{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "36467fbd",
   "metadata": {},
   "source": [
    "[代码参考链接 - 唐国梁Tommy - 机器学习实战](https://space.bilibili.com/474347248/channel/detail?cid=143235&ctype=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "72de5e24",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy\n",
    "from scipy.optimize import curve_fit\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8116ac5c",
   "metadata": {},
   "source": [
    "### 1. 读取ERa, molecular对应的表格数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c30ee7e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['training', 'test']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataERa_xls = pd.ExcelFile('../ERα_activity.xlsx')\n",
    "sheets = dataERa_xls.sheet_names\n",
    "sheets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6938973f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1974, 3)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>IC50_nM</th>\n",
       "      <th>pIC50</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>Cc1ccc(cc1)N2CCc3cc(O)ccc3C2(C)c4ccc(OCCN5CCCC...</td>\n",
       "      <td>28.00</td>\n",
       "      <td>7.552842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>CC(=C(c1ccc(O)cc1)c2ccc(O)cc2)c3ccccc3</td>\n",
       "      <td>30.00</td>\n",
       "      <td>7.522879</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1260</th>\n",
       "      <td>Cc1cccc2c(C)cc(SCC(=O)NC(=O)NCc3occc3)nc12</td>\n",
       "      <td>32039.87</td>\n",
       "      <td>4.494309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1705</th>\n",
       "      <td>Cc1c(c2ccc(O)c(F)c2)n(Cc3ccc(OCCN4CCCCCC4)cc3)...</td>\n",
       "      <td>13.00</td>\n",
       "      <td>7.886057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1336</th>\n",
       "      <td>O=Cc1ccc2OC3=C(C(=O)N4CCCSC4=N3)C(=O)c2c1</td>\n",
       "      <td>17800.63</td>\n",
       "      <td>4.749565</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 SMILES   IC50_nM     pIC50\n",
       "107   Cc1ccc(cc1)N2CCc3cc(O)ccc3C2(C)c4ccc(OCCN5CCCC...     28.00  7.552842\n",
       "62               CC(=C(c1ccc(O)cc1)c2ccc(O)cc2)c3ccccc3     30.00  7.522879\n",
       "1260         Cc1cccc2c(C)cc(SCC(=O)NC(=O)NCc3occc3)nc12  32039.87  4.494309\n",
       "1705  Cc1c(c2ccc(O)c(F)c2)n(Cc3ccc(OCCN4CCCCCC4)cc3)...     13.00  7.886057\n",
       "1336          O=Cc1ccc2OC3=C(C(=O)N4CCCSC4=N3)C(=O)c2c1  17800.63  4.749565"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataERa_Train = pd.read_excel('../ERα_activity.xlsx', sheet_name=sheets[0])\n",
    "print(dataERa_Train.shape)\n",
    "dataERa_Train.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e3109144",
   "metadata": {},
   "source": [
    "#### 验证 IC50_nM 与 PIC50 相关性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "29f5a498",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          IC50_nM     pIC50\n",
      "IC50_nM  1.000000 -0.185963\n",
      "pIC50   -0.185963  1.000000 \n",
      "\n",
      "         IC50_nM  pIC50\n",
      "IC50_nM      1.0   -1.0\n",
      "pIC50       -1.0    1.0 \n",
      "\n",
      "         IC50_nM  pIC50\n",
      "IC50_nM      1.0   -1.0\n",
      "pIC50       -1.0    1.0\n"
     ]
    }
   ],
   "source": [
    "dataERa_Train_corr = dataERa_Train[['IC50_nM', 'pIC50']] \n",
    "print(dataERa_Train_corr.corr(method='pearson'),'\\n')\n",
    "print(dataERa_Train_corr.corr(method='kendall'),'\\n')\n",
    "print(dataERa_Train_corr.corr(method='spearman'))   # 非线性强相关"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "129edb99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='IC50_nM', ylabel='pIC50'>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEHCAYAAACp9y31AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXR0lEQVR4nO3dfZAc9X3n8c9npUUSCJC8WjCRIMIG7BgCAjYEh5iyeHBhhxPJ4Qe4PJicY1XOucQ+xwfOXcqUXZcqQ5ILJsnZp8OOIRfjYOQH4kd8PATHBtkrLIRksC3AgAhCayGBRIRYab/3x/Ti1Wp6Z2Y1093bv/erampnft0z/d2u1Ue/+XX3rx0RAgCko6/sAgAAxSL4ASAxBD8AJIbgB4DEEPwAkJjZZRfQjkWLFsXSpUvLLgMAZpS1a9f+NCIGJ7fPiOBfunSphoeHyy4DAGYU2483a2eoBwASQ/ADQGIIfgBIDMEPAIkh+AEgMbUO/m279uiBJ3do2649ZZcCAJUxI07nnI4vrXtKV61er/6+Po2OjenaS0/VimWLyy4LAEpXyx7/tl17dNXq9XpxdEw79+zVi6NjunL1enr+AKCaBv/m7bvV37f/r9bf16fN23eXVBEAVEctg3/JwnkaHRvbr210bExLFs4rqSIAqI5aBv/A/Dm69tJTNbe/T4fPma25/X269tJTNTB/TtmlAUDpantwd8WyxTrnhEXavH23liycR+gDQKa2wS81ev4EPgDsr5ZDPQCAfD0Lftufsr3V9oYJba+w/U3bP85+LuzV9iVp0zM7devwk9r0zM5ebgYAZpRe9vg/LemiSW0flHRHRJwo6Y7sdU986IsP6oK/ukcfuHW9Lvire/ShLz3Yq00BwIzSs+CPiHskPTup+RJJN2bPb5T0673Y9qZnduqm+57Yr+2me5+g5w8AKn6M/+iIeDp7vkXS0Xkr2l5pe9j28MjISEcbWffkjo7aASAlpR3cjYiQFFMsXxURQxExNDh4wC0jp7Ts2AUdtQNASooO/mdsHyNJ2c+tvdjIwsMO6agdAFJSdPDfJumd2fN3SvpSLzay8V+f66gdAFLSy9M5b5Z0r6TX2N5s+12SPirpQts/lnRB9roXW++wHQDS0bMrdyPi8pxF5/dqm+NO/rkjOmoHgJTU8srdP/7s/R21A0BKahn833ls8uUDU7cDQEpqGfwnvzJnqCenHQBSUsvg//O3L+uoHQBSUsvgf3hL86kZ8toBICW1DP5PffuRjtoBICW1DP5Htr7QUTsApKSWwX/Gsc2n+c9rB4CU1DL4n9u9p6N2AEhJLYN/w9PND+LmtQNASmoZ/P2zOmsHgJTUMvj3jTWfjC2vHQBSUsvgXzC3v6N2AEhJLYN/X859vfLaASAltQz+Q2Y1H9LJaweAlNQy+J/Z2fy0zbx2AEhJKcFv+722N9jeaPt93f78yBnSyWsHgJQUHvy2T5H0bklnSTpN0sW2T+jmNo45cm5H7QCQkjJ6/L8gaU1E/FtE7JX0z5L+fTc3sHXnix21A0BKygj+DZLeYHvA9qGS3iLp2Mkr2V5pe9j28MjISEcbeGmss3YASEnhwR8RD0m6RtLtkr4uaZ2kfU3WWxURQxExNDg42NE25uVcoZvXDgApKeXgbkR8MiLOjIhzJW2X9KNufj7n8QNAvtllbNT2URGx1fZxaozvn93Nz2eoBwDylRL8klbbHpA0KukPImJHSXUAQHJKCf6IeEMvP7/f0miTYZ1+LtwFgHpeueucgM9rB4CU1DL49+WM5ee1A0BKahn8fTk9+7x2AEhJPYM/57fKaweAlNQyCp1zvn5eOwCkpJbBzwVcAJCvlsHf7FTOqdoBICW1DH4AQL5aBn/eL1XLXxYAOlTLLMw7XZ/T+AGgpsEPAMhH8ANAYgh+AEgMwQ8AiSH4ASAxBD8AJKaU4Lf9X2xvtL3B9s2253bz8/s7bAeAlBQe/LYXS/ojSUMRcYqkWZIu6+Y2RjtsB4CUlDXUM1vSPNuzJR0q6V9LqgMAklN48EfEU5L+QtITkp6W9FxE3D55PdsrbQ/bHh4ZGSm6TACorTKGehZKukTS8ZJ+TtJhtn9r8noRsSoihiJiaHBwsOgyAaC2yhjquUDSYxExEhGjkj4v6VdKqAMAklRG8D8h6Wzbh9q2pPMlPVRCHQCQpDLG+NdIulXS/ZIezGpYVXQdAJCq2WVsNCKulnR1rz5/7izpxX3N2wEgdbW8cjdybrGY1w4AKaln8HfYDgApqWXwv5Rzq628dgBISS2DHwCQj+AHgMQQ/ACQGIIfABJD8ANAYgh+AEgMwQ8AiSH4ASAxBD8AJIbgB4DEEPwAkBiCHwASQ/ADQGLKuNn6a2yvm/B43vb7iq4DAFJV+B24IuKHkpZJku1Zkp6S9IWi6wCAVJU91HO+pEci4vGS6wCAZJQd/JdJurnZAtsrbQ/bHh4ZGSm4LACor9KC3/YhklZI+lyz5RGxKiKGImJocHCw2OIAoMbK7PG/WdL9EfFMiTUAQHLKDP7LlTPMAwDonSmD3/aRtj9q+2Hbz9reZvuhrG3BdDdq+zBJF0r6/HQ/AwAwPa16/LdI2i7pjRHxiogYkLQ8a7tluhuNiBciYiAinpvuZwAApqdV8C+NiGsiYst4Q0RsiYhrJP18b0sDAPRCq+B/3PaVto8eb7B9tO2rJD3Z29IAAL3QKvjfIWlA0j9nY/zPSrpb0iskvb3HtQEAemDKKRsiYrukq7IHAKAGOpqrx/avSjpL0oaIuL03JQEAeqnV6ZzfnfD83ZL+RtLhkq62/cEe1wYA6IFWY/z9E56vlHRhRHxY0psk/WbPqgIA9EyroZ4+2wvV+A/CETEiNc7Dt72359UBALquVfAfKWmtJEsK28dExNO252dtAIAZptVZPUtzFu2T9BtdrwYA0HOtDu7+ku03N1m0XI1z+QEAM0yrg7vXSPpBk/aNkv68++UAAHqtVfAf3uy2iFnbot6UBADopVbBv3CKZYd2sxAAQDFaBf//s/1ntl8+g8cNH5F0Z29LAwD0QqvTOf9Y0g2SNtlel7WdJmlY0u/1sC4AQI+0Op3zBUmX236VpJOz5o0R8ejBbDS7e9cNkk6RFJL+Y0TcezCfCQBoz5TBb/uMCS+fyn4uGG+PiPunud2PSfp6RLzV9iHieAEAFKbVUM9fTngeE547e31epxu0faSkcyVdIUkR8ZKklzr9HADA9Ex5cDcilkfEcklvkfQVSc9J2iHptqxtOo6XNCLp72x/3/YN2c3X92N7pe1h28MjIyPT3BQAYLJWZ/WMu1HSL0i6XtJfS3qdpJumuc3Zks6Q9PGIOF3SC5IOmOI5IlZFxFBEDA0ODk5zUwCAydq9EcspEfG6Ca/vst3sit52bJa0OSLWZK9vVZPgBwD0Rrs9/vttnz3+wvYvq3FKZ8ciYoukJ22/Jms6X82nhQAA9EC7Pf4zJX3H9hPZ6+Mk/dD2g5IiIk7tcLt/KOkfsjN6HpX0ux2+HwAwTe0G/0Xd3GhErJM01M3PBAC0p63gbzZRGwBgZmp3jB8AUBMEPwAkhuAHgMQQ/ACQGIIfABJD8ANAYgh+AEgMwQ8AiSH4ASAxBD8AJCa54L/jB1vKLgEASpVc8P/3LzxYdgkAUKpaBv8vHXdE7rItO7m9L4C01TL4P/eeN5RdAgBUVi2DHwCQr90bsXSV7Z9I2ilpn6S9EcFNWQCgIKUEf2Z5RPy0xO0DQJIY6gGAxJQV/CHpdttrba9stoLtlbaHbQ+PjIwUXB4A1FdZwf+rEXGGpDdL+gPb505eISJWRcRQRAwNDg4WXyEA1FQpwR8RT2U/t0r6gqSzyqgDAFJUePDbPsz24ePPJb1J0oai6wCAVJVxVs/Rkr5ge3z7n4mIr5dQBwAkqfAef0Q8GhGnZY+TI+LPiq5h5afXFL1JAKiM2p7OOXhY/peZ2x/m8gEA6apt8H/00tPKLgEAKqm2wX/+615ZdgkAUEm1DX4AQHMEPwAkJtngv/i6u8suAQBKUevgX3TorNxlG7a8UGAlAFAdtQ7+b7x/edklAEDl1Dr4B+bPmXL5/77rxwVVAgDVUevgb+X6OzeVXQIAFK72wf+m1y7KXfbC6FiBlQBANdQ++Fdd8ctTLt+2a09BlQBANdQ++CXp7KULc5f9/b0/Ka4QAKiAJIL/f/zGL+Yuu+4OxvkBpCWJ4D/h6MO1ZMHc3OXv+Ph3CqwGAMqVRPBL0nXvWJa7bM3j27XpmZ3FFQMAJSot+G3Psv19218uYntDxw/o1YsOzV3+yX95tIgyAKB0Zfb43yvpoSI3eMcHluf+wjd/b7N++4b7iiwHAEpRSvDbXiLp1yTdUPS2r7/89Nxl39q0TcOPbSuwGgAoXlk9/uskXSkp9woq2yttD9seHhkZ6dqGX//qAXmK5f/nW491bVsAUEWFB7/tiyVtjYi1U60XEasiYigihgYHB7u2/YH5c/Sxy5Ypb97Ou344wkVdAGqtjB7/OZJW2P6JpM9KOs/2/y2ygBXLFuu7f3qBTjrqsAOW9c+27np4K+EPoLYKD/6I+JOIWBIRSyVdJunOiPitousYmD9HN698vQ6Ztf8ueGHPPl1920adc82dum3dU0WXBQA9l8x5/M0MzJ+jv3jbqZrb36fD5vxs8OeFl/bpxdExXbl6PT1/ALVTavBHxN0RcXGZNaxYtljfvuo8ffjfnazDDtl/5L+/r0+bt+8uqTIA6I2ke/zjBubP0fLXHqV9Efu1j46NacnCeSVVBQC9QfBnBubP0bWXNoZ9Dp8zW3P7+3Ttpae2vIsXAMw0s8suoEpWLFusc05YpM3bd2vJwnmEPoBaIvgnGZg/h8AHUGsM9QBAYgj+Htm2a48eeHIHp4MCqByGenrgS+ue0lWr16u/r0+jY2O69tJTtWLZ4rLLAgBJ9Pi7btuuPbpq9Xq9ODqmnXv2ciEYgMoh+Lts8/bd6u/bf7e2uhCMYSEARWKop8uWLJyn0bH9Z5ue6kIwhoUAFI0ef5d1ciEYw0IAykCPvwfavRBsfFjoxQn3oxkfFpr8nm279nBhGYCuIPh7pJ0LwdodFmI4CEA3MdRTonaGhRgOAtBt9PhL1mpYqJPhIABoB8FfAVMNC3V6lhAAtFLGzdbn2v6u7Qdsb7T94aJrmEmqOl001x4AM1cZPf49ks6LiF22+yX9i+2vRcR9JdQyI1RtumgONgMzW+HBHxEhaVf2sj97RP47IFVnuuiJB5vHjztcuXq9zjlhUSXqA9BaKWf12J5le52krZK+GRFrmqyz0vaw7eGRkZHCa0Rz05mSAkC1lBL8EbEvIpZJWiLpLNunNFlnVUQMRcTQ4OBg4TWiOQ42AzNfqefxR8QOSXdJuqjMOtC+qh5sBtC+wsf4bQ9KGo2IHbbnSbpQ0jVF14Hpq9rBZgCdKeOsnmMk3Wh7lhrfOG6JiC+XUAcOQlUONgPoXBln9ayXdHrR2wUANDBXDwAkhuAHgMQQ/ACQGIIfwIzA/FDdw+ycACqP+aG6ix4/gEpL9WZEvfyGQ48fQKWleDOiXn/DoccPoNJSmx+qiG84BD+ASkttfqgiZsBlqAdA5aU0P1QR33Do8QOYEQbmz9Fpxy6odehLxXzDoccPABXT6284BD8AVFAvZ8BlqAcAEkPwA0BiCH4ASAzBDwCJIfgBIDGOiLJraMn2iKTHp/n2RZJ+2sVyem0m1UutvTOT6p1JtUozq96DrfXnI2JwcuOMCP6DYXs4IobKrqNdM6leau2dmVTvTKpVmln19qpWhnoAIDEEPwAkJoXgX1V2AR2aSfVSa+/MpHpnUq3SzKq3J7XWfowfALC/FHr8AIAJCH4ASExtgt/2RbZ/aHuT7Q82WT7H9j9my9fYXlpCmeO1tKr1Ctsjttdlj98ro86slk/Z3mp7Q85y274++13W2z6j6Bon1dOq3jfafm7Cvv1Q0TVOqOVY23fZ/oHtjbbf22SdSuzfNmut0r6da/u7th/I6v1wk3UqkQlt1trdTIiIGf+QNEvSI5JeJekQSQ9Iet2kdd4j6RPZ88sk/WOFa71C0t+UvV+zWs6VdIakDTnL3yLpa5Is6WxJaype7xslfbns/ZrVcoykM7Lnh0v6UZO/hUrs3zZrrdK+taT52fN+SWsknT1pnapkQju1djUT6tLjP0vSpoh4NCJekvRZSZdMWucSSTdmz2+VdL5tF1jjuHZqrYyIuEfSs1Oscomkm6LhPkkLbB9TTHUHaqPeyoiIpyPi/uz5TkkPSVo8abVK7N82a62MbH/tyl72Z4/JZ7JUIhParLWr6hL8iyU9OeH1Zh34R/nyOhGxV9JzkgYKqS6njkyzWiXp0uyr/a22jy2mtGlp9/epktdnX6u/ZvvksouRpGyY4XQ1ensTVW7/TlGrVKF9a3uW7XWStkr6ZkTk7tuSM6GdWqUuZkJdgr9u/knS0og4VdI39bNeCQ7e/WrMX3KapL+W9MVyy5Fsz5e0WtL7IuL5suuZSotaK7VvI2JfRCyTtETSWbZPKbOeqbRRa1czoS7B/5Skif8DLsnamq5je7akIyVtK6S6nDoyB9QaEdsiYk/28gZJZxZU23S0s+8rIyKeH/9aHRFfldRve1FZ9djuVyNI/yEiPt9klcrs31a1Vm3fjouIHZLuknTRpEVVyYSX5dXa7UyoS/B/T9KJto+3fYgaB2pum7TObZLemT1/q6Q7IztqUrCWtU4aw12hxnhqVd0m6Xeys0/OlvRcRDxddlF5bL9yfBzX9llq/Bso5R97VscnJT0UEf8zZ7VK7N92aq3Yvh20vSB7Pk/ShZIenrRaJTKhnVq7nQm1uNl6ROy1/Z8lfUONs2Y+FREbbX9E0nBE3KbGH+3f296kxsG/yypc6x/ZXiFpb1brFWXUKkm2b1bjbI1FtjdLulqNg0+KiE9I+qoaZ55skvRvkn63nEob2qj3rZL+k+29knZLuqykDoAknSPptyU9mI3vStJ/k3ScVLn9206tVdq3x0i60fYsNf4DuiUivlzFTGiz1q5mAlM2AEBi6jLUAwBoE8EPAIkh+AEgMQQ/ACSG4AeAinGLyQabrP92/2wCvc+0XJ+zegCgWmyfK2mXGvM0TXnFse0TJd0i6byI2G77qIjYOtV76PEjKbZ3TXh+ku2v2v6x7ftt32L7aNtLbe+eMAXuJya850zbD7oxle/105nUy43pi2Pi1Lq2l2VtHzj43xIzXbPJBm2/2vbXba+1/S3br80WvVvS30bE9uy9U4a+RPAjUbbnSvqKpI9HxIkRcYak/yVpMFvlkYhYlj1+f8JbP67GP7QTs8fkaQDatUHS2ye8vlyNKbqBPKsk/WFEnCnpA2r8vUrSSZJOsv1t2/fZbvk3WYsrd4Fp+A+S7o2IfxpviIi7pZdnnzxAdtn8Edn0yLJ9k6RfV2O+/Gbr363GDJbLJS2Q9K6I+Fa2+HFJR9g+Wo0ZGS9S4ypd4ADZ5Hi/IulzE75kzsl+zlajE/JGNeZyusf2L2bz/jRF8CNVp0haO8Xy421/X9Lzkv40C+zFakyLPK6dKZJnR8RZtt+ixvQRF0xYdqukt0n6vhozW+5p8n5AaozO7Mhm8Jxssxo36BmV9JjtH6nxH8H3pvowAPt7WtJxEXG6pPdL+oztI6b5WeOzWK6VtHTSslvUCP7LJd08zc9HArIpsB+z/Tbp5VtynpYt/qIavX1ls6GeJOnRqT6P4EeqNipnatuI2BMR27Lna9W4VeZJakzju2TCqu1MkTzei9+nSd+wI2KLpFE1ZmO8o8P6UWPZZIP3SnqN7c223yXpNyW9y/YDavz9jt+57xuSttn+gRpTOv/X8b/fPAz1IFWfkfQntn8tIr4ivXwK3bOSnpH0bETss/0qNb42PxoRz9p+PpseeY2k31HjhiMH40OSjsq2dZAfhbqIiMtzFh1w4DabAfX92aMtBD+SFBG7bV8s6Trb16nR814v6b1q3LD9I7ZHJY1J+v2IGD+17j2SPi1pnhoHdZse2O2gju8czPuB6eACLgBIDGP8AJAYhnqAg2T7b9W4Q9VEH4uIvyujHqAVhnoAIDEM9QBAYgh+AEgMwQ8AiSH4ASAx/x8/0tRQ0vlkhAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dataERa_Train_corr.plot(kind='scatter',x='IC50_nM',y=\"pIC50\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "05420ce3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 9.        , -0.43429448]),\n",
       " array([[ 1.98997914e-32, -2.65726378e-33],\n",
       "        [-2.65726378e-33,  4.78081407e-34]]))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对数拟合\n",
    "x = np.array(dataERa_Train_corr['IC50_nM'])\n",
    "y = np.array(dataERa_Train_corr['pIC50'])\n",
    "scipy.optimize.curve_fit(lambda t,a,b: a+b*np.log(t),  x,  y)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d14c9095",
   "metadata": {},
   "source": [
    "$$y=9 + (-0.434)\\times {log_e}^{x}$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b9dbfaee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['training', 'test']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataMol_xls = pd.ExcelFile('../Molecular_Descriptor.xlsx')\n",
    "sheets2 = dataMol_xls.sheet_names\n",
    "sheets2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c91f8e82",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1974, 730)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>nAtom</th>\n",
       "      <th>nHeavyAtom</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1204</th>\n",
       "      <td>CCCSc1nc(C)c(cc1C#N)c2csc(n2)C(=C3CCCC3)C#N</td>\n",
       "      <td>0</td>\n",
       "      <td>0.6436</td>\n",
       "      <td>0.414221</td>\n",
       "      <td>108.2910</td>\n",
       "      <td>58.735860</td>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>46</td>\n",
       "      <td>26</td>\n",
       "      <td>...</td>\n",
       "      <td>380.112939</td>\n",
       "      <td>52.632703</td>\n",
       "      <td>2.024335</td>\n",
       "      <td>17.320374</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.204101</td>\n",
       "      <td>1759</td>\n",
       "      <td>39</td>\n",
       "      <td>3.280</td>\n",
       "      <td>132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1825</th>\n",
       "      <td>CC(F)(F)c1cc(F)ccc1c2sc3cc(O)ccc3c2Oc4ccc(\\C=C...</td>\n",
       "      <td>1</td>\n",
       "      <td>2.5558</td>\n",
       "      <td>6.532114</td>\n",
       "      <td>134.7918</td>\n",
       "      <td>63.114481</td>\n",
       "      <td>21</td>\n",
       "      <td>23</td>\n",
       "      <td>50</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>470.079965</td>\n",
       "      <td>67.121171</td>\n",
       "      <td>2.033975</td>\n",
       "      <td>21.069374</td>\n",
       "      <td>10.537409</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3213</td>\n",
       "      <td>53</td>\n",
       "      <td>3.476</td>\n",
       "      <td>178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415</th>\n",
       "      <td>Oc1ccc(c(CC#N)c1)c2noc3cc(O)ccc23</td>\n",
       "      <td>0</td>\n",
       "      <td>0.4568</td>\n",
       "      <td>0.208666</td>\n",
       "      <td>79.7600</td>\n",
       "      <td>37.673930</td>\n",
       "      <td>15</td>\n",
       "      <td>17</td>\n",
       "      <td>30</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>266.069142</td>\n",
       "      <td>40.977670</td>\n",
       "      <td>2.048884</td>\n",
       "      <td>13.803019</td>\n",
       "      <td>8.212153</td>\n",
       "      <td>5.590865</td>\n",
       "      <td>782</td>\n",
       "      <td>31</td>\n",
       "      <td>1.038</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>523</th>\n",
       "      <td>COc1c(O)ccc2cc(ccc12)c3ccc(O)cc3</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3352</td>\n",
       "      <td>1.782759</td>\n",
       "      <td>87.5229</td>\n",
       "      <td>41.661102</td>\n",
       "      <td>16</td>\n",
       "      <td>18</td>\n",
       "      <td>34</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>266.094294</td>\n",
       "      <td>41.030998</td>\n",
       "      <td>2.051550</td>\n",
       "      <td>7.884117</td>\n",
       "      <td>7.884117</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>820</td>\n",
       "      <td>33</td>\n",
       "      <td>2.606</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>CN(C)c1cccc(c1)N2CCc3cc(O)ccc3C2(C)c4ccc(OCCN5...</td>\n",
       "      <td>0</td>\n",
       "      <td>1.7022</td>\n",
       "      <td>2.897485</td>\n",
       "      <td>152.3737</td>\n",
       "      <td>82.375341</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>...</td>\n",
       "      <td>471.288577</td>\n",
       "      <td>72.222754</td>\n",
       "      <td>2.063507</td>\n",
       "      <td>15.599169</td>\n",
       "      <td>5.634024</td>\n",
       "      <td>9.965145</td>\n",
       "      <td>3841</td>\n",
       "      <td>60</td>\n",
       "      <td>3.620</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 730 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 SMILES  nAcid   ALogP  \\\n",
       "1204        CCCSc1nc(C)c(cc1C#N)c2csc(n2)C(=C3CCCC3)C#N      0  0.6436   \n",
       "1825  CC(F)(F)c1cc(F)ccc1c2sc3cc(O)ccc3c2Oc4ccc(\\C=C...      1  2.5558   \n",
       "415                   Oc1ccc(c(CC#N)c1)c2noc3cc(O)ccc23      0  0.4568   \n",
       "523                    COc1c(O)ccc2cc(ccc12)c3ccc(O)cc3      0  1.3352   \n",
       "156   CN(C)c1cccc(c1)N2CCc3cc(O)ccc3C2(C)c4ccc(OCCN5...      0  1.7022   \n",
       "\n",
       "        ALogp2       AMR       apol  naAromAtom  nAromBond  nAtom  nHeavyAtom  \\\n",
       "1204  0.414221  108.2910  58.735860          11         12     46          26   \n",
       "1825  6.532114  134.7918  63.114481          21         23     50          33   \n",
       "415   0.208666   79.7600  37.673930          15         17     30          20   \n",
       "523   1.782759   87.5229  41.661102          16         18     34          20   \n",
       "156   2.897485  152.3737  82.375341          18         18     72          35   \n",
       "\n",
       "      ...          MW     WTPT-1    WTPT-2     WTPT-3     WTPT-4     WTPT-5  \\\n",
       "1204  ...  380.112939  52.632703  2.024335  17.320374   0.000000  11.204101   \n",
       "1825  ...  470.079965  67.121171  2.033975  21.069374  10.537409   0.000000   \n",
       "415   ...  266.069142  40.977670  2.048884  13.803019   8.212153   5.590865   \n",
       "523   ...  266.094294  41.030998  2.051550   7.884117   7.884117   0.000000   \n",
       "156   ...  471.288577  72.222754  2.063507  15.599169   5.634024   9.965145   \n",
       "\n",
       "      WPATH  WPOL  XLogP  Zagreb  \n",
       "1204   1759    39  3.280     132  \n",
       "1825   3213    53  3.476     178  \n",
       "415     782    31  1.038     106  \n",
       "523     820    33  2.606     106  \n",
       "156    3841    60  3.620     190  \n",
       "\n",
       "[5 rows x 730 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataMol_Train = pd.read_excel('../Molecular_Descriptor.xlsx', sheet_name=sheets[0])\n",
    "print(dataMol_Train.shape)\n",
    "dataMol_Train.sample(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "32eae9e6",
   "metadata": {},
   "source": [
    "### 2. 将ERa, Molecular 表格合并 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8d4db053",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SMILES</th>\n",
       "      <th>IC50_nM</th>\n",
       "      <th>pIC50</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCC3)c4ccc(OCCN5C...</td>\n",
       "      <td>2.5</td>\n",
       "      <td>8.602060</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.2860</td>\n",
       "      <td>0.081796</td>\n",
       "      <td>126.1188</td>\n",
       "      <td>74.170169</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>439.218115</td>\n",
       "      <td>64.771680</td>\n",
       "      <td>2.089409</td>\n",
       "      <td>15.471445</td>\n",
       "      <td>8.858910</td>\n",
       "      <td>3.406628</td>\n",
       "      <td>3011</td>\n",
       "      <td>47</td>\n",
       "      <td>4.666</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCCCC3)c4ccc(OCCN...</td>\n",
       "      <td>7.5</td>\n",
       "      <td>8.124939</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.8620</td>\n",
       "      <td>0.743044</td>\n",
       "      <td>131.9420</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.960024</td>\n",
       "      <td>2.089698</td>\n",
       "      <td>15.486947</td>\n",
       "      <td>8.863774</td>\n",
       "      <td>3.406648</td>\n",
       "      <td>3516</td>\n",
       "      <td>54</td>\n",
       "      <td>5.804</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Oc1ccc(cc1)[C@H]2Sc3cc(O)ccc3O[C@H]2c4ccc(OCCN...</td>\n",
       "      <td>3.1</td>\n",
       "      <td>8.508638</td>\n",
       "      <td>0</td>\n",
       "      <td>0.7296</td>\n",
       "      <td>0.532316</td>\n",
       "      <td>139.9304</td>\n",
       "      <td>74.064997</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>463.181729</td>\n",
       "      <td>68.748923</td>\n",
       "      <td>2.083301</td>\n",
       "      <td>18.011114</td>\n",
       "      <td>11.390412</td>\n",
       "      <td>3.406644</td>\n",
       "      <td>3542</td>\n",
       "      <td>52</td>\n",
       "      <td>2.964</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@@H](CC3CCCCC3)Sc2c1)c4ccc(OCC...</td>\n",
       "      <td>3.9</td>\n",
       "      <td>8.408935</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.3184</td>\n",
       "      <td>0.101379</td>\n",
       "      <td>133.4822</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>6.015</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Oc1ccc2O[C@H]([C@@H](Cc3ccccc3)Sc2c1)c4ccc(OCC...</td>\n",
       "      <td>7.4</td>\n",
       "      <td>8.130768</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3551</td>\n",
       "      <td>1.836296</td>\n",
       "      <td>143.1903</td>\n",
       "      <td>76.356583</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>461.202465</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>4.462</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 732 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              SMILES  IC50_nM     pIC50  \\\n",
       "0  Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCC3)c4ccc(OCCN5C...      2.5  8.602060   \n",
       "1  Oc1ccc2O[C@H]([C@H](Sc2c1)C3CCCCCC3)c4ccc(OCCN...      7.5  8.124939   \n",
       "2  Oc1ccc(cc1)[C@H]2Sc3cc(O)ccc3O[C@H]2c4ccc(OCCN...      3.1  8.508638   \n",
       "3  Oc1ccc2O[C@H]([C@@H](CC3CCCCC3)Sc2c1)c4ccc(OCC...      3.9  8.408935   \n",
       "4  Oc1ccc2O[C@H]([C@@H](Cc3ccccc3)Sc2c1)c4ccc(OCC...      7.4  8.130768   \n",
       "\n",
       "   nAcid   ALogP    ALogp2       AMR       apol  naAromAtom  nAromBond  ...  \\\n",
       "0      0 -0.2860  0.081796  126.1188  74.170169          12         12  ...   \n",
       "1      0 -0.8620  0.743044  131.9420  80.357341          12         12  ...   \n",
       "2      0  0.7296  0.532316  139.9304  74.064997          18         18  ...   \n",
       "3      0 -0.3184  0.101379  133.4822  80.357341          12         12  ...   \n",
       "4      0  1.3551  1.836296  143.1903  76.356583          18         18  ...   \n",
       "\n",
       "           MW     WTPT-1    WTPT-2     WTPT-3     WTPT-4    WTPT-5  WPATH  \\\n",
       "0  439.218115  64.771680  2.089409  15.471445   8.858910  3.406628   3011   \n",
       "1  467.249415  68.960024  2.089698  15.486947   8.863774  3.406648   3516   \n",
       "2  463.181729  68.748923  2.083301  18.011114  11.390412  3.406644   3542   \n",
       "3  467.249415  68.883696  2.087385  15.468365   8.857943  3.406624   3594   \n",
       "4  461.202465  68.883696  2.087385  15.468365   8.857943  3.406624   3594   \n",
       "\n",
       "   WPOL  XLogP  Zagreb  \n",
       "0    47  4.666     166  \n",
       "1    54  5.804     174  \n",
       "2    52  2.964     176  \n",
       "3    50  6.015     174  \n",
       "4    50  4.462     174  \n",
       "\n",
       "[5 rows x 732 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_train = pd.merge(dataERa_Train,dataMol_Train,  on='SMILES')\n",
    "data_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "26fa3f98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pIC50</th>\n",
       "      <th>nAcid</th>\n",
       "      <th>ALogP</th>\n",
       "      <th>ALogp2</th>\n",
       "      <th>AMR</th>\n",
       "      <th>apol</th>\n",
       "      <th>naAromAtom</th>\n",
       "      <th>nAromBond</th>\n",
       "      <th>nAtom</th>\n",
       "      <th>nHeavyAtom</th>\n",
       "      <th>...</th>\n",
       "      <th>MW</th>\n",
       "      <th>WTPT-1</th>\n",
       "      <th>WTPT-2</th>\n",
       "      <th>WTPT-3</th>\n",
       "      <th>WTPT-4</th>\n",
       "      <th>WTPT-5</th>\n",
       "      <th>WPATH</th>\n",
       "      <th>WPOL</th>\n",
       "      <th>XLogP</th>\n",
       "      <th>Zagreb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.602060</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.2860</td>\n",
       "      <td>0.081796</td>\n",
       "      <td>126.1188</td>\n",
       "      <td>74.170169</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>64</td>\n",
       "      <td>31</td>\n",
       "      <td>...</td>\n",
       "      <td>439.218115</td>\n",
       "      <td>64.771680</td>\n",
       "      <td>2.089409</td>\n",
       "      <td>15.471445</td>\n",
       "      <td>8.858910</td>\n",
       "      <td>3.406628</td>\n",
       "      <td>3011</td>\n",
       "      <td>47</td>\n",
       "      <td>4.666</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.124939</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.8620</td>\n",
       "      <td>0.743044</td>\n",
       "      <td>131.9420</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>70</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.960024</td>\n",
       "      <td>2.089698</td>\n",
       "      <td>15.486947</td>\n",
       "      <td>8.863774</td>\n",
       "      <td>3.406648</td>\n",
       "      <td>3516</td>\n",
       "      <td>54</td>\n",
       "      <td>5.804</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8.508638</td>\n",
       "      <td>0</td>\n",
       "      <td>0.7296</td>\n",
       "      <td>0.532316</td>\n",
       "      <td>139.9304</td>\n",
       "      <td>74.064997</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>62</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>463.181729</td>\n",
       "      <td>68.748923</td>\n",
       "      <td>2.083301</td>\n",
       "      <td>18.011114</td>\n",
       "      <td>11.390412</td>\n",
       "      <td>3.406644</td>\n",
       "      <td>3542</td>\n",
       "      <td>52</td>\n",
       "      <td>2.964</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8.408935</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.3184</td>\n",
       "      <td>0.101379</td>\n",
       "      <td>133.4822</td>\n",
       "      <td>80.357341</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>70</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>467.249415</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>6.015</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.130768</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3551</td>\n",
       "      <td>1.836296</td>\n",
       "      <td>143.1903</td>\n",
       "      <td>76.356583</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>64</td>\n",
       "      <td>33</td>\n",
       "      <td>...</td>\n",
       "      <td>461.202465</td>\n",
       "      <td>68.883696</td>\n",
       "      <td>2.087385</td>\n",
       "      <td>15.468365</td>\n",
       "      <td>8.857943</td>\n",
       "      <td>3.406624</td>\n",
       "      <td>3594</td>\n",
       "      <td>50</td>\n",
       "      <td>4.462</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1969</th>\n",
       "      <td>6.131944</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>73</td>\n",
       "      <td>43</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.709996</td>\n",
       "      <td>2.063023</td>\n",
       "      <td>25.470481</td>\n",
       "      <td>21.946991</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7121</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1970</th>\n",
       "      <td>5.850781</td>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>41</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.662088</td>\n",
       "      <td>2.064929</td>\n",
       "      <td>24.928962</td>\n",
       "      <td>21.405589</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6171</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1971</th>\n",
       "      <td>7.721246</td>\n",
       "      <td>0</td>\n",
       "      <td>1.6903</td>\n",
       "      <td>2.857114</td>\n",
       "      <td>167.6057</td>\n",
       "      <td>82.972618</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>67</td>\n",
       "      <td>41</td>\n",
       "      <td>...</td>\n",
       "      <td>570.134839</td>\n",
       "      <td>84.660642</td>\n",
       "      <td>2.064894</td>\n",
       "      <td>24.923083</td>\n",
       "      <td>21.400883</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6421</td>\n",
       "      <td>66</td>\n",
       "      <td>1.884</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1972</th>\n",
       "      <td>7.886057</td>\n",
       "      <td>0</td>\n",
       "      <td>1.3365</td>\n",
       "      <td>1.786232</td>\n",
       "      <td>125.5605</td>\n",
       "      <td>63.287860</td>\n",
       "      <td>18</td>\n",
       "      <td>18</td>\n",
       "      <td>51</td>\n",
       "      <td>31</td>\n",
       "      <td>...</td>\n",
       "      <td>436.098059</td>\n",
       "      <td>64.171346</td>\n",
       "      <td>2.070043</td>\n",
       "      <td>19.841924</td>\n",
       "      <td>16.326873</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2583</td>\n",
       "      <td>50</td>\n",
       "      <td>0.782</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1973</th>\n",
       "      <td>7.568636</td>\n",
       "      <td>0</td>\n",
       "      <td>1.8193</td>\n",
       "      <td>3.309852</td>\n",
       "      <td>177.6817</td>\n",
       "      <td>89.159790</td>\n",
       "      <td>24</td>\n",
       "      <td>24</td>\n",
       "      <td>73</td>\n",
       "      <td>43</td>\n",
       "      <td>...</td>\n",
       "      <td>598.166139</td>\n",
       "      <td>88.708522</td>\n",
       "      <td>2.062989</td>\n",
       "      <td>25.464529</td>\n",
       "      <td>21.942236</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7421</td>\n",
       "      <td>70</td>\n",
       "      <td>2.526</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1974 rows × 730 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         pIC50  nAcid   ALogP    ALogp2       AMR       apol  naAromAtom  \\\n",
       "0     8.602060      0 -0.2860  0.081796  126.1188  74.170169          12   \n",
       "1     8.124939      0 -0.8620  0.743044  131.9420  80.357341          12   \n",
       "2     8.508638      0  0.7296  0.532316  139.9304  74.064997          18   \n",
       "3     8.408935      0 -0.3184  0.101379  133.4822  80.357341          12   \n",
       "4     8.130768      0  1.3551  1.836296  143.1903  76.356583          18   \n",
       "...        ...    ...     ...       ...       ...        ...         ...   \n",
       "1969  6.131944      0  1.8193  3.309852  177.6817  89.159790          24   \n",
       "1970  5.850781      0  1.6903  2.857114  167.6057  82.972618          24   \n",
       "1971  7.721246      0  1.6903  2.857114  167.6057  82.972618          24   \n",
       "1972  7.886057      0  1.3365  1.786232  125.5605  63.287860          18   \n",
       "1973  7.568636      0  1.8193  3.309852  177.6817  89.159790          24   \n",
       "\n",
       "      nAromBond  nAtom  nHeavyAtom  ...          MW     WTPT-1    WTPT-2  \\\n",
       "0            12     64          31  ...  439.218115  64.771680  2.089409   \n",
       "1            12     70          33  ...  467.249415  68.960024  2.089698   \n",
       "2            18     62          33  ...  463.181729  68.748923  2.083301   \n",
       "3            12     70          33  ...  467.249415  68.883696  2.087385   \n",
       "4            18     64          33  ...  461.202465  68.883696  2.087385   \n",
       "...         ...    ...         ...  ...         ...        ...       ...   \n",
       "1969         24     73          43  ...  598.166139  88.709996  2.063023   \n",
       "1970         24     67          41  ...  570.134839  84.662088  2.064929   \n",
       "1971         24     67          41  ...  570.134839  84.660642  2.064894   \n",
       "1972         18     51          31  ...  436.098059  64.171346  2.070043   \n",
       "1973         24     73          43  ...  598.166139  88.708522  2.062989   \n",
       "\n",
       "         WTPT-3     WTPT-4    WTPT-5  WPATH  WPOL  XLogP  Zagreb  \n",
       "0     15.471445   8.858910  3.406628   3011    47  4.666     166  \n",
       "1     15.486947   8.863774  3.406648   3516    54  5.804     174  \n",
       "2     18.011114  11.390412  3.406644   3542    52  2.964     176  \n",
       "3     15.468365   8.857943  3.406624   3594    50  6.015     174  \n",
       "4     15.468365   8.857943  3.406624   3594    50  4.462     174  \n",
       "...         ...        ...       ...    ...   ...    ...     ...  \n",
       "1969  25.470481  21.946991  0.000000   7121    70  2.526     236  \n",
       "1970  24.928962  21.405589  0.000000   6171    66  1.884     228  \n",
       "1971  24.923083  21.400883  0.000000   6421    66  1.884     228  \n",
       "1972  19.841924  16.326873  0.000000   2583    50  0.782     174  \n",
       "1973  25.464529  21.942236  0.000000   7421    70  2.526     236  \n",
       "\n",
       "[1974 rows x 730 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataFeature = data_train[data_train.columns[2:732].tolist()]\n",
    "dataFeature"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "414b337b",
   "metadata": {},
   "source": [
    "`pIC50`的数据分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "01eea4cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    1974.000000\n",
       "mean        6.586112\n",
       "std         1.423023\n",
       "min         2.455932\n",
       "25%         5.382154\n",
       "50%         6.580871\n",
       "75%         7.568235\n",
       "max        10.337242\n",
       "Name: pIC50, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataFeature['pIC50'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "76638fa2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAAHWCAYAAAC10QUfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcgElEQVR4nO3da7Bld1nn8d9jGkmISLi0mZgONmqGiBdCaCIO6mAYFIiS6CgTykuKShmmJs7AaI00lDVg1VAVqkajTI2UgSBB5RKDSIZEhhBQyxcmdiBCQmBoIZhuAmm5imgw4ZkXZ0WOodPZ59+9zj47+XyqTp21/nvtnad3dVW+vc46a1d3BwAA2LivW/YAAACwqsQ0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMmj2mq+qoqnpfVb192n9MVV1bVXur6s1V9fXT+oOn/b3T4zvnng0AAA7HZpyZfkGSm9ftvyLJRd397Uk+m+S8af28JJ+d1i+ajgMAgC1r1piuqh1Jzkzymmm/kpyR5PLpkEuTnD1tnzXtZ3r8adPxAACwJc19Zvo3kvxykq9M+49M8rnuvnPa35fkxGn7xCS3Jsn0+Oen4wEAYEvaNtcLV9WPJrm9u6+vqqcewdc9P8n5SXLsscc+8ZRTTjlSLw0AAAd1/fXX/213b7/n+mwxneQpSZ5dVc9KcnSSb0zym0mOq6pt09nnHUn2T8fvT3JSkn1VtS3Jw5J8+p4v2t0XJ7k4SXbt2tV79uyZ8Y8AAABJVX38YOuzXebR3S/u7h3dvTPJOUne3d0/neQ9SX5yOuzcJG+btq+Y9jM9/u7u7rnmAwCAw7WM+0y/KMkvVtXerF0Tfcm0fkmSR07rv5hk9xJmAwCAhc15mcc/6+4/SfIn0/ZHk5x+kGP+MclPbcY8AABwJPgERAAAGCSmAQBgkJgGAIBBYhoAAAaJaQAAGCSmAQBgkJgGAIBBYhoAAAaJaQAAGCSmAQBgkJgGAIBBYhoAAAaJaQAAGCSmAQBgkJgGAIBBYhoAAAZtW/YAAGyunbuvXPYIm+6WC89c9gjA/ZQz0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMGi2mK6qo6vquqr6q6q6qap+dVp/XVV9rKpumL5Ondarql5ZVXur6v1VddpcswEAwJGwbcbXviPJGd39xap6UJI/r6o/nh77b919+T2Of2aSk6ev703yquk7AABsSbOdme41X5x2HzR99SGeclaS10/P+4skx1XVCXPNBwAAh2vWa6ar6qiquiHJ7Umu7u5rp4dePl3KcVFVPXhaOzHJreuevm9au+drnl9Ve6pqz4EDB+YcHwAADmnWmO7uu7r71CQ7kpxeVd+V5MVJTknypCSPSPKiDb7mxd29q7t3bd++/UiPDAAAC9uUu3l09+eSvCfJM7r7tulSjjuS/E6S06fD9ic5ad3TdkxrAACwJc15N4/tVXXctH1Mkqcn+dDd10FXVSU5O8mN01OuSPJz0109npzk891921zzAQDA4Zrzbh4nJLm0qo7KWrRf1t1vr6p3V9X2JJXkhiT/cTr+qiTPSrI3yZeSPG/G2QAA4LDNFtPd/f4kTzjI+hn3cnwnuWCueQAA4EjzCYgAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg7YtewCAZdq5+8pljwDACnNmGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABm1b9gDA1rFz95XLHgEAVooz0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMGjbsgcAgLnt3H3lskfYdLdceOayR4AHBGemAQBg0GwxXVVHV9V1VfVXVXVTVf3qtP6Yqrq2qvZW1Zur6uun9QdP+3unx3fONRsAABwJc56ZviPJGd39+CSnJnlGVT05ySuSXNTd357ks0nOm44/L8lnp/WLpuMAAGDLmi2me80Xp90HTV+d5Iwkl0/rlyY5e9o+a9rP9PjTqqrmmg8AAA7XrNdMV9VRVXVDktuTXJ3kr5N8rrvvnA7Zl+TEafvEJLcmyfT455M8cs75AADgcMwa0919V3efmmRHktOTnHK4r1lV51fVnqrac+DAgcN9OQAAGLYpd/Po7s8leU+S70tyXFXdfUu+HUn2T9v7k5yUJNPjD0vy6YO81sXdvau7d23fvn3u0QEA4F7NeTeP7VV13LR9TJKnJ7k5a1H9k9Nh5yZ527R9xbSf6fF3d3fPNR8AAByuOT+05YQkl1bVUVmL9su6++1V9cEkb6qq/5HkfUkumY6/JMnvVtXeJJ9Jcs6MswEAwGGbLaa7+/1JnnCQ9Y9m7frpe67/Y5KfmmseAAA40nwCIgAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADJotpqvqpKp6T1V9sKpuqqoXTOsvq6r9VXXD9PWsdc95cVXtraoPV9WPzDUbAAAcCdtmfO07k/xSd7+3qh6a5Pqqunp67KLu/p/rD66qxyU5J8l3JvnmJO+qqn/d3XfNOCMAAAyb7cx0d9/W3e+dtv8uyc1JTjzEU85K8qbuvqO7P5Zkb5LT55oPAAAO16ZcM11VO5M8Icm109IvVNX7q+q1VfXwae3EJLeue9q+HDq+AQBgqWaP6ar6hiRvSfLC7v5Cklcl+bYkpya5LcmvbfD1zq+qPVW158CBA0d6XAAAWNisMV1VD8paSP9+d/9hknT3p7r7ru7+SpJX56uXcuxPctK6p++Y1v6F7r64u3d1967t27fPOT4AABzSnHfzqCSXJLm5u3993foJ6w778SQ3TttXJDmnqh5cVY9JcnKS6+aaDwAADtecd/N4SpKfTfKBqrphWntJkudW1alJOsktSZ6fJN19U1VdluSDWbsTyAXu5AEAwFY2W0x3958nqYM8dNUhnvPyJC+fayYAADiS5jwzDStt5+4rlz0CALDF+ThxAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABolpAAAYJKYBAGCQmAYAgEFiGgAABi0U01X13XMPAgAAq2bRM9O/VVXXVdV/qqqHzToRAACsiIViurt/IMlPJzkpyfVV9YaqevqskwEAwBa38DXT3f2RJL+S5EVJ/m2SV1bVh6rqJ+YaDgAAtrJFr5n+nqq6KMnNSc5I8mPd/R3T9kUzzgcAAFvWtgWP+19JXpPkJd39D3cvdvcnqupXZpkMAAC2uEVj+swk/9DddyVJVX1dkqO7+0vd/buzTQcAAFvYotdMvyvJMev2HzKtAQDAA9aiMX10d3/x7p1p+yHzjAQAAKth0Zj++6o67e6dqnpikn84xPEAAHC/t+g10y9M8gdV9YkkleRfJfkPcw0FAACrYKGY7u6/rKpTkjx2Wvpwd//TfGMBAMDWt+iZ6SR5UpKd03NOq6p09+tnmQoAAFbAQjFdVb+b5NuS3JDkrmm5k4hpAAAesBY9M70ryeO6u+ccBgAAVsmid/O4MWu/dAgAAEwWPTP9qCQfrKrrktxx92J3P3uWqQCAw7Jz95XLHmHT3XLhmcsegQegRWP6ZXMOAQAAq2jRW+P9aVV9S5KTu/tdVfWQJEfNOxoAAGxtC10zXVU/n+TyJL89LZ2Y5I9mmgkAAFbCor+AeEGSpyT5QpJ090eSfNNcQwEAwCpYNKbv6O4v371TVduydp/pe1VVJ1XVe6rqg1V1U1W9YFp/RFVdXVUfmb4/fFqvqnplVe2tqvdX1WmjfygAANgMi8b0n1bVS5IcU1VPT/IHSf7PfTznziS/1N2PS/LkJBdU1eOS7E5yTXefnOSaaT9Jnpnk5Onr/CSv2tCfBAAANtmiMb07yYEkH0jy/CRXJfmVQz2hu2/r7vdO23+X5OasXWt9VpJLp8MuTXL2tH1Wktf3mr9IclxVnbD4HwUAADbXonfz+EqSV09fG1ZVO5M8Icm1SY7v7tumhz6Z5Php+8Qkt6572r5p7bZ1a6mq87N25jqPfvSjR8YBAIAjYqGYrqqP5SDXSHf3ty7w3G9I8pYkL+zuL1TV+ud3VW3oI8q7++IkFyfJrl27fLw5AABLs+iHtuxat310kp9K8oj7elJVPShrIf373f2H0/KnquqE7r5tuozj9ml9f5KT1j19x7QGAABb0kLXTHf3p9d97e/u30hyyM/srLVT0Jckubm7f33dQ1ckOXfaPjfJ29at/9x0V48nJ/n8ustBAABgy1n0Mo/1t6n7uqydqb6v5z4lyc8m+UBV3TCtvSTJhUkuq6rzknw8yXOmx65K8qwke5N8KcnzFpkNAACWZdHLPH5t3fadSW7JVyP4oLr7z5PUvTz8tIMc31n7cBgAAFgJi97N44fmHgQAAFbNopd5/OKhHr/HNdEAAPCAsJG7eTwpa78kmCQ/luS6JB+ZYygAAFgFi8b0jiSnTZ9kmKp6WZIru/tn5hoMAAC2ukU/Tvz4JF9et//lfPWTCwEA4AFp0TPTr09yXVW9ddo/O8mls0wEAAArYtG7eby8qv44yQ9MS8/r7vfNNxYAAGx9i17mkSQPSfKF7v7NJPuq6jEzzQQAACthoZiuqpcmeVGSF09LD0rye3MNBQAAq2DRM9M/nuTZSf4+Sbr7E0keOtdQAACwChaN6S9PH/fdSVJVx843EgAArIZFY/qyqvrtJMdV1c8neVeSV883FgAAbH33eTePqqokb05ySpIvJHlskv/e3VfPPBsAAGxp9xnT3d1VdVV3f3cSAQ0AAJNFL/N4b1U9adZJAABgxSz6CYjfm+RnquqWrN3Ro7J20vp75hoMAAC2ukPGdFU9urv/JsmPbNI8AACwMu7rzPQfJTmtuz9eVW/p7n+/CTMBAMBKuK9rpmvd9rfOOQgAAKya+4rpvpdtAAB4wLuvyzweX1VfyNoZ6mOm7eSrv4D4jbNOBwAAW9ghY7q7j9qsQQAAYNUsep9pAADgHsQ0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBotpiuqtdW1e1VdeO6tZdV1f6qumH6eta6x15cVXur6sNV9SNzzQUAAEfKnGemX5fkGQdZv6i7T52+rkqSqnpcknOSfOf0nN+qqqNmnA0AAA7bbDHd3X+W5DMLHn5Wkjd19x3d/bEke5OcPtdsAABwJCzjmulfqKr3T5eBPHxaOzHJreuO2TetAQDAlrXZMf2qJN+W5NQktyX5tY2+QFWdX1V7qmrPgQMHjvB4AACwuE2N6e7+VHff1d1fSfLqfPVSjv1JTlp36I5p7WCvcXF37+ruXdu3b593YAAAOIRNjemqOmHd7o8nuftOH1ckOaeqHlxVj0lycpLrNnM2AADYqG1zvXBVvTHJU5M8qqr2JXlpkqdW1alJOsktSZ6fJN19U1VdluSDSe5MckF33zXXbAAAcCTMFtPd/dyDLF9yiONfnuTlc80DAABHmk9ABACAQbOdmQYA2Ew7d1+57BE21S0XnrnsEYgz0wAAMExMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDxDQAAAwS0wAAMEhMAwDAIDENAACDZovpqnptVd1eVTeuW3tEVV1dVR+Zvj98Wq+qemVV7a2q91fVaXPNBQAAR8qcZ6Zfl+QZ91jbneSa7j45yTXTfpI8M8nJ09f5SV4141wAAHBEzBbT3f1nST5zj+Wzklw6bV+a5Ox166/vNX+R5LiqOmGu2QAA4EjY7Gumj+/u26btTyY5fto+Mcmt647bN60BAMCWtbRfQOzuTtIbfV5VnV9Ve6pqz4EDB2aYDAAAFrPZMf2puy/fmL7fPq3vT3LSuuN2TGtfo7sv7u5d3b1r+/btsw4LAACHstkxfUWSc6ftc5O8bd36z0139Xhyks+vuxwEAAC2pG1zvXBVvTHJU5M8qqr2JXlpkguTXFZV5yX5eJLnTIdfleRZSfYm+VKS5801FwAAHCmzxXR3P/deHnraQY7tJBfMNQsAAMzBJyACAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIPENAAADBLTAAAwSEwDAMAgMQ0AAIO2LeM/WlW3JPm7JHclubO7d1XVI5K8OcnOJLckeU53f3YZ8wEAwCKWeWb6h7r71O7eNe3vTnJNd5+c5JppHwAAtqytdJnHWUkunbYvTXL28kYBAID7tqyY7iTvrKrrq+r8ae347r5t2v5kkuMP9sSqOr+q9lTVngMHDmzGrAAAcFBLuWY6yfd39/6q+qYkV1fVh9Y/2N1dVX2wJ3b3xUkuTpJdu3Yd9BgAANgMSzkz3d37p++3J3lrktOTfKqqTkiS6fvty5gNAAAWtekxXVXHVtVD795O8sNJbkxyRZJzp8POTfK2zZ4NAAA2YhmXeRyf5K1Vdfd//w3d/Y6q+sskl1XVeUk+nuQ5S5gNAAAWtukx3d0fTfL4g6x/OsnTNnseAAAYtZVujQcAACtFTAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAM2rbsAQAA2Lidu69c9gib7pYLz1z2CF/DmWkAABgkpgEAYJCYBgCAQWIaAAAGiWkAABgkpgEAYJCYBgCAQWIaAAAGiWkAABgkpgEAYJCYBgCAQWIaAAAGiWkAABgkpgEAYJCYBgCAQWIaAAAGbVv2AKyOnbuvXPYIAABbijPTAAAwSEwDAMAgl3kMcskDAABb7sx0VT2jqj5cVXuravey5wEAgHuzpWK6qo5K8r+TPDPJ45I8t6oet9ypAADg4LZUTCc5Pcne7v5od385yZuSnLXkmQAA4KC2WkyfmOTWdfv7pjUAANhyVu4XEKvq/CTnT7tfrKoPL3OeFfCoJH+77CFWjPds47xnG+c9G+N92zjv2cZ5zzZuU96zesXc/4VD+paDLW61mN6f5KR1+zumtX/W3RcnuXgzh1plVbWnu3cte45V4j3bOO/ZxnnPxnjfNs57tnHes417IL9nW+0yj79McnJVPaaqvj7JOUmuWPJMAABwUFvqzHR331lVv5Dk/yY5Kslru/umJY8FAAAHtaViOkm6+6okVy17jvsRl8RsnPds47xnG+c9G+N92zjv2cZ5zzbuAfueVXcvewYAAFhJW+2aaQAAWBli+n6oqo6uquuq6q+q6qaq+tVlz7QqquqoqnpfVb192bOsiqq6pao+UFU3VNWeZc+zCqrquKq6vKo+VFU3V9X3LXumrayqHjv9/br76wtV9cJlz7XVVdV/nf4fcGNVvbGqjl72TKugql4wvWc3+Xt2cFX12qq6vapuXLf2iKq6uqo+Mn1/+DJn3Exi+v7pjiRndPfjk5ya5BlV9eTljrQyXpDk5mUPsYJ+qLtPfaDeFmnAbyZ5R3efkuTx8XfukLr7w9Pfr1OTPDHJl5K8dblTbW1VdWKS/5JkV3d/V9Z+qf+c5U619VXVdyX5+ax9IvPjk/xoVX37cqfakl6X5Bn3WNud5JruPjnJNdP+A4KYvh/qNV+cdh80fbk4/j5U1Y4kZyZ5zbJn4f6rqh6W5AeTXJIk3f3l7v7cUodaLU9L8tfd/fFlD7ICtiU5pqq2JXlIkk8seZ5V8B1Jru3uL3X3nUn+NMlPLHmmLae7/yzJZ+6xfFaSS6ftS5OcvZkzLZOYvp+aLle4IcntSa7u7muXPNIq+I0kv5zkK0ueY9V0kndW1fXTJ5RyaI9JciDJ70yXFL2mqo5d9lAr5Jwkb1z2EFtdd+9P8j+T/E2S25J8vrvfudypVsKNSX6gqh5ZVQ9J8qz8yw+T494d3923TdufTHL8MofZTGL6fqq775p+JLojyenTj664F1X1o0lu7+7rlz3LCvr+7j4tyTOTXFBVP7jsgba4bUlOS/Kq7n5Ckr/PA+jHoYdj+jCvZyf5g2XPstVN16uelbV/vH1zkmOr6meWO9XW1903J3lFkncmeUeSG5LctcyZVlGv3SruAfMTcTF9Pzf9+Pg9+dprm/iXnpLk2VV1S5I3JTmjqn5vuSOthukMWLr79qxdx3r6cifa8vYl2bfup0WXZy2uuW/PTPLe7v7UsgdZAf8uyce6+0B3/1OSP0zyb5Y800ro7ku6+4nd/YNJPpvk/y17phXxqao6IUmm77cveZ5NI6bvh6pqe1UdN20fk+TpST601KG2uO5+cXfv6O6dWfsx8ru721mc+1BVx1bVQ+/eTvLDWfsxKfeiuz+Z5Naqeuy09LQkH1ziSKvkuXGJx6L+JsmTq+ohVVVZ+3vmF10XUFXfNH1/dNaul37DcidaGVckOXfaPjfJ25Y4y6bacp+AyBFxQpJLq+qorP2D6bLudqs35nB8kreu/b8625K8obvfsdyRVsJ/TvL702ULH03yvCXPs+VN/1h7epLnL3uWVdDd11bV5Unem+TOJO/LA/gT6jboLVX1yCT/lOQCvyD8tarqjUmemuRRVbUvyUuTXJjksqo6L8nHkzxneRNuLp+ACAAAg1zmAQAAg8Q0AAAMEtMAADBITAMAwCAxDQAAg8Q0AAAMEtMAADBITAMAwKD/D4X5+Gw5h3DMAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 864x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dataFeature['pIC50'].plot(kind='hist', figsize=(12,8))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "01043608",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = dataFeature[dataFeature.columns[1:]].values\n",
    "y = dataFeature['pIC50'].values"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7bb9465",
   "metadata": {},
   "source": [
    "#### 数据划分，特征归一化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "451c08ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aa6618a3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1579, 729) (395, 729)\n"
     ]
    }
   ],
   "source": [
    "print(X_train.shape, X_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "33c1838c",
   "metadata": {},
   "source": [
    "### 回归模型"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f39fb4f8",
   "metadata": {},
   "source": [
    "#### 1.Lasso \n",
    "- 网格搜索：获取最优超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "254240a0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Lasso(alpha=0.01, max_iter=5000)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.linear_model import Lasso\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "X_train_new = StandardScaler().fit_transform(X_train)    # 树模型不需要归一化\n",
    "\n",
    "params = {'alpha':[0.001, 0.01,0.1,1.0,10.0],'max_iter':[100,1000,5000,10000]}\n",
    "\n",
    "grid_search = GridSearchCV(Lasso(),params,cv=10, n_jobs=-1, scoring='neg_mean_squared_error')\n",
    "\n",
    "grid_search.fit(X_train_new, y_train)\n",
    "\n",
    "grid_search.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "56b5354d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rmse scores :  [0.5596235544262801, 0.6500376252974729, 0.6533166006159559, 0.754178974083143, 0.6857202079700503, 0.6445496779492916, 0.7006022955577618, 1.2272039161352652, 0.9179893348217645, 1.0548680620799342]\n",
      "average rmse scores : 0.7848090248936919\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "\n",
    "# K折交叉验证\n",
    "kf = KFold(n_splits=10)\n",
    "\n",
    "rmse_scores = [] \n",
    "\n",
    "for train_indices, test_indices in kf.split(X):\n",
    "    X_new = StandardScaler().fit_transform(X)\n",
    "    \n",
    "    X_train_new, X_test_new = X_new[train_indices], X_new[test_indices]\n",
    "    y_train, y_test = y[train_indices], y[test_indices]\n",
    "\n",
    "    lasso = grid_search.best_estimator_\n",
    "\n",
    "    lasso.fit(X_train_new, y_train)\n",
    "\n",
    "    y_pred = lasso.predict(X_test_new)\n",
    "\n",
    "    rmse = mean_absolute_error(y_test, y_pred)\n",
    "\n",
    "    rmse_scores.append(rmse)\n",
    "\n",
    "\n",
    "print(\"rmse scores : \", rmse_scores)\n",
    "print(f'average rmse scores : {np.mean(rmse_scores)}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e834983",
   "metadata": {},
   "source": [
    "rmse值较大，且特征重要度不易排序，在此暂不考虑`Lasso`模型的特征排序"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a79d981d",
   "metadata": {},
   "source": [
    "#### 2. RandomForest\n",
    "- 网格搜索：获取最优超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "73c963b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestRegressor(max_features=8, n_estimators=30)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "param_grid = [{'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},\n",
    "              {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]}]\n",
    "\n",
    "\n",
    "grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=10, n_jobs=-1,scoring='neg_mean_squared_error')\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)\n",
    "\n",
    "grid_search.fit(X_train,y_train)\n",
    "\n",
    "grid_search.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "7103ae04",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rmse scores :  [0.6055992917550804, 0.7232752772911965, 0.6544468415341502, 0.7179032846914167, 0.7203445468036371, 0.6192555787700924, 0.6600007575173633, 0.9856119214680825, 0.8967782042920874, 1.042537451908988]\n",
      "average rmse scores : 0.7625753156032095\n"
     ]
    }
   ],
   "source": [
    "# K折交叉验证\n",
    "kf = KFold(n_splits=10)\n",
    "\n",
    "rmse_scores = [] \n",
    "\n",
    "for train_indices, test_indices in kf.split(X):\n",
    "    X_train, X_test = X[train_indices], X[test_indices]\n",
    "    y_train, y_test = y[train_indices], y[test_indices]\n",
    "\n",
    "    RFR = grid_search.best_estimator_\n",
    "\n",
    "    RFR.fit(X_train, y_train)\n",
    "\n",
    "    y_pred = RFR.predict(X_test)\n",
    "\n",
    "    rmse = mean_absolute_error(y_test, y_pred)\n",
    "\n",
    "    rmse_scores.append(rmse)\n",
    "\n",
    "print(\"rmse scores : \", rmse_scores)\n",
    "print(f'average rmse scores : {np.mean(rmse_scores)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "5602bbeb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([238,   5, 105, 476, 504,   4,  35,  56, 529, 350, 104, 706, 638,\n",
       "       109,  34, 658, 727, 584,  39, 653])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "importances = RFR.feature_importances_\n",
    "RF_indices = np.argsort(importances)[::-1]  # 逆序\n",
    "RF_indices[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fe854d42",
   "metadata": {},
   "source": [
    "#### 3. xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "1b2680c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n",
       "             importance_type='gain', interaction_constraints='',\n",
       "             learning_rate=0.300000012, max_delta_step=0, max_depth=2,\n",
       "             min_child_weight=1, missing=nan, monotone_constraints='()',\n",
       "             n_estimators=200, n_jobs=12, num_parallel_tree=1, random_state=0,\n",
       "             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,\n",
       "             tree_method='exact', validate_parameters=1, verbosity=None)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import xgboost as xgb\n",
    "\n",
    "param_dist = {\n",
    "        'n_estimators' : np.arange(100, 500, 50),\n",
    "        'max_depth' : np.arange(2, 10, 2)\n",
    "        }\n",
    "\n",
    "\n",
    "grid_search = GridSearchCV(xgb.XGBRegressor(), param_dist, cv=5, n_jobs=-1,scoring='neg_mean_squared_error')\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)\n",
    "\n",
    "grid_search.fit(X_train,y_train)\n",
    "\n",
    "grid_search.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "847493c1",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rmse scores :  [0.6312620625570345, 0.6010116352037546, 0.6354705698580512, 0.7947763055335008, 0.7941632651089343, 0.5498306202509072, 0.5472986787145724, 1.1231572999082142, 0.9029916779842503, 1.1535983583227898]\n",
      "average rmse scores : 0.7733560473442009\n"
     ]
    }
   ],
   "source": [
    "# K折交叉验证\n",
    "kf = KFold(n_splits=10)\n",
    "\n",
    "rmse_scores = [] \n",
    "\n",
    "for train_indices, test_indices in kf.split(X):\n",
    "    X_train, X_test = X[train_indices], X[test_indices]\n",
    "    y_train, y_test = y[train_indices], y[test_indices]\n",
    "\n",
    "    XGBR = grid_search.best_estimator_\n",
    "\n",
    "    XGBR.fit(X_train, y_train)\n",
    "\n",
    "    y_pred = XGBR.predict(X_test)\n",
    "\n",
    "    rmse = mean_absolute_error(y_test, y_pred)\n",
    "\n",
    "    rmse_scores.append(rmse)\n",
    "\n",
    "print(\"rmse scores : \", rmse_scores)\n",
    "print(f'average rmse scores : {np.mean(rmse_scores)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "3e3ba618",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([659, 476, 231,  56,  58, 587, 639, 166, 716, 406, 361, 353, 357,\n",
       "       664, 629, 387,  22,  42, 535, 673])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_importance = XGBR.feature_importances_\n",
    "xgb_indices = np.argsort(feature_importance)[::-1]\n",
    "xgb_indices[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "60d0dfab",
   "metadata": {},
   "source": [
    "#### 4.lightgbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c45a67f9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 50 candidates, totalling 250 fits\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMRegressor(learning_rate=0.2, max_depth=4, min_child_samples=150,\n",
       "              n_estimators=250, num_leaves=18, random_state=666)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import lightgbm as lgb\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "\n",
    "# 默认参数\n",
    "fit_params = {\n",
    "    'early_stopping_rounds' : 30,\n",
    "    'eval_metric' : 'regressor',\n",
    "    'eval_set' : [(X_test, y_test)],\n",
    "    'eval_names' : ['valid'],\n",
    "    'verbose' : 0,\n",
    "    'categorical_feature' : 'auto'\n",
    "}\n",
    "\n",
    "# 搜索超参数\n",
    "params_test = {\n",
    "    'learning_rate' : [0.01, 0.02, 0.03, 0.05, 0.07, 0.1, 0.2, 0.3, 0.4], # 学习率\n",
    "    'n_estimators' : np.arange(100, 500, 50), # 树模型\n",
    "    'num_leaves' : np.arange(10, 50), # 叶子数\n",
    "    'min_child_samples' : np.arange(100, 500, 50), # 最小子节点样本数\n",
    "    'max_depth' : np.arange(1, 10), # 树的深度\n",
    "}\n",
    "\n",
    "n_iter = 50 # 循环50轮\n",
    "\n",
    "# 初始化模型\n",
    "model = lgb.LGBMRegressor(random_state=666, n_jobs=-1)\n",
    "\n",
    "# 网格搜索\n",
    "grid_search = RandomizedSearchCV(estimator=model,\n",
    "                                 param_distributions = params_test,\n",
    "                                 n_iter = n_iter,\n",
    "                                 scoring = 'neg_mean_squared_error',\n",
    "                                 random_state = 666,\n",
    "                                 verbose = True,\n",
    "                                 n_jobs = -1)\n",
    "\n",
    "\n",
    "# 训练\n",
    "grid_search.fit(X_train, y_train, **fit_params)\n",
    "\n",
    "grid_search.best_estimator_\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b05da7fe",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rmse scores :  [0.5963797830397128, 0.630986414292441, 0.5819503635432313, 0.6628574246482151, 0.8023429746690468, 0.6071009811543012, 0.46722961240144767, 0.9967398237322119, 0.8979305953780745, 1.1341992228905795]\n",
      "average rmse scores : 0.7377717195749262\n"
     ]
    }
   ],
   "source": [
    "# K折交叉验证\n",
    "kf = KFold(n_splits=10)\n",
    "\n",
    "rmse_scores = [] \n",
    "\n",
    "\n",
    "for train_indices, test_indices in kf.split(X):\n",
    "    X_train, X_test = X[train_indices], X[test_indices]\n",
    "    y_train, y_test = y[train_indices], y[test_indices]\n",
    "\n",
    "    LGBR = grid_search.best_estimator_\n",
    "\n",
    "    LGBR.fit(X_train, y_train)\n",
    "\n",
    "    y_pred = LGBR.predict(X_test)\n",
    "\n",
    "    rmse = mean_absolute_error(y_test, y_pred)\n",
    "\n",
    "    rmse_scores.append(rmse)\n",
    "\n",
    "print(\"rmse scores : \", rmse_scores)\n",
    "print(f'average rmse scores : {np.mean(rmse_scores)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2e6646e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([637, 103, 673, 661, 344, 606, 371,  26,  25,  24, 727, 586, 383,\n",
       "        75, 604, 587, 616, 655,  30,  83])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_importance = LGBR.feature_importances_\n",
    "lgb_indices = np.argsort(feature_importance)[::-1]\n",
    "lgb_indices[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "09d65c34",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 864x576 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAEWCAYAAAAHC8LZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAA/I0lEQVR4nO3de5xVZd3//9cbEDygIMGQAkJqSAo4nvHOCC0x0zykCdyV4tjdbWV1e8DD1zQ0swJRS393Jiqe8mwokseEUW9PKTYCmqDiGBioqKDgCeTz++O6Btbs2XvPZvZ55vN8PPZj1l5rXWt99naYy7XWtd5LZoZzzjlXbTqVuwDnnHOuLbwDc845V5W8A3POOVeVvANzzjlXlbwDc845V5W8A3POOVeVvANzzjlXlbwDc67CSWqU9JGkVYnXtnlu7+uFrNG5cvAOzLnq8C0z6554/btchUjqUq59O5fkHZhzVUhSD0lXS1oq6Q1JF0jqHJftIGmWpHckLZf0Z0k947IbgO2Ae+KR3OmSRklakrL99UdpkiZKukPSjZLeB8a3sv8dJT0iaWXc/62l/G5cx+EdmHPV6VpgLbAjsBswGvhBXCbgN8C2wJeAAcBEADP7PvAvNhzRTcpxf4cDdwA9gT+3sv9fAQ8CWwP9gcva8gGda42fCnCuOtwlaW2cfhI4AOhpZh8BqyVdAvwQ+JOZvQK8Etd9W9LFwC/z3P+TZnYXgKStgG9m2j+wBhgIbGtmS4D/y3PfzqXlHZhz1eEIM/sbgKS9gYOApZKalncCFsflfYHfA18BtozL3stz/4sT0wOBTTLtHzidcBT2d0nvAVPM7Jo89+9cC96BOVd9FgOfAL3NbG2a5RcCBgwzs3clHQFcnlie+giK1cDmTW/itaw+Kesk22Tdv5ktA/4rbms/4G+SHo1Hhs4VjF8Dc67KmNlSwjWmKZK2ktQpDtz4alxlS2AVsFJSP2BCyibeBLZPvF8IbCrpEEmbAL8AurV1/5K+I6l/XP09Que3Lq8P7Vwa3oE5V52OBboCLxI6iTuAbeKy84DdgZXAX4G/pLT9DfALSSsknWZmK4EfA1cBbxCOyJaQXbb97wU8LWkVMAP4uZktauPndC4j+QMtnXPOVSM/AnPOOVeVvANzzjlXlbwDc845V5W8A3POOVeV/D6wEunZs6ftuOOO5S4jo9WrV7PFFluUu4ysKr1Gry8/Xl9+2mt9c+bMWW5mqfclBmbmrxK8Bg8ebJVs9uzZ5S6hVZVeo9eXH68vP+21PuBZy/B31U8hOuecq0regTnnnKtK3oE555yrSt6BOeecq0regTnnnKtK3oE555wribq6Ompqahg6dOj6ec8//zz77rsvw4YN41vf+hbvv/9+ztsragcm6fOSbpH0qqQ5ku6VNDjDuoMkzS9mPZlI6irpSkkLJb0k6ag4/0RJ8yQ1SPo/STvH+d+N85pe6yTVlqN255yrFuPHj+f+++9vNu8HP/gBv/3tb5k3bx5HHnkkkydPznl7RevAFB7VOh2oN7MdzGwP4Cygb7H2mYezgbfMbDCwM/BInH+TmQ0zs1pgEnAxgJn92cxq4/zvA6+ZWUPJq3bOuSoycuRIevXq1WzewoULGTlyJAAHHnggd955Z87bK2YSx/7AGjO7ommGmT2vYDJwMOFBdxeY2a3JhpLGA3ua2Unx/UzgIjOrj88Y+iPwTWAp8P8Inct2wP+Y2YzY/jDCU2Z3AKab2elZaq0DhsQa1wHL43TyWHYLWj7JFmAccEtrX8ZHaz5j0Jl/bW21sjl12FrGV3B9UPk1en358fryU+z6Gn97SFG2u8suu3D33XdzxBFHcPvtt7N48eKc2xazAxsKzEkz/9tALbAr0Bt4RtKjG7HdLYBZZjZB0nTgAuBAwpHTdYQH6BH3sRvh0ecLJF1mZi2+GUk94+SvJI0CXgVOMrM34/KfAKcQHt53QJp6xgCHpytU0g+BHwL07t2Hc4ele/p7Zei7WfgHUMkqvUavLz9eX36KXV99fX1e7VetWkV9fT3Lli1j9erV67d34okn8utf/5rTTz+dL3/5y3Tq1Cn3fWWK6Mj3BfwMuCTN/EuAusT7GwhHS4OA+XHeeODyxDozgVFx+hM2PIjzfODsON0JWJFoPzXR/j5gvwx19iYcWR0d358C3JBmvf8ErkuZtw8wL5fvw6Ok8lfpNXp9+fH68lMt9b322mu2yy67pF1nwYIFttdeezWbR5mipF4A9mhj27U0vz63aWJ6TfxQAOsIHVrTqb/kEeUnienPyHy0+Q7wIRseu3474XHsqW4BjkiZNxa4OcN2nXPOteKtt94CYN26dVxwwQWceOKJObctZgc2C+gWT6MBIGk4sAIYI6mzpD7ASODvKW0bgVpJnSQNAPYuVpGxM7wHGBVnfQ14Mdb7xcSqhwAvN72R1Ak4hhyufznnnINx48ax7777smDBAvr378/VV1/NzTffzODBgxkyZAjbbrstxx9/fM7bK9o1MDMzSUcCl0o6A/iY0DH9D9AdeJ5w6u50M1smaVCi+ePAa4SO5J/Ac8WqMzoDuEHSpcDbQNM3eJKkrwNrgPeA4xJtRgKLzWxRkWtzzrl24eab05+w+vnPf96m7RX1eWBm9m/CUUqqCfGVXLeRMPCj6ajouxm22T0xPTHdMjO7Frg2Mf/QVup8ndAhpc7P+K2aWT0wItt2nXPOFY8ncTjnnMtJuiSNhoYGRowYQW1tLXvuuSd//3vqFaHi6VAdmKSnUxI0GiQNK3ddzjlXDdIlaZx++un88pe/pKGhgfPPP5/TT892y21hdagoKTPbx2KCRuI1L0uU1CWJjm6hpBVx/kBJz8X5L0jKfdiMc85VqXRJGpLW5xeuXLmSbbfdtmT1aMOI9AJvOERJPUG4d+qKOG9XYCszeyzN+oOAmWY2NHVZsUk6D+hsZr+Iowt7mdnylHV+CuxmZnWSuhK+u08kdQfmA/8Rr/mltd32O1qnY35fzI+Rl1OHrWXKvKJeEs1bpdfo9eXH68tPa/UVKkmjsbGRQw89lPnzw/HGP//5Tw466CDMjHXr1vHEE08wcODAFu3q6+sZNWrURu9P0hwz2zPtsiJ2YAcAE81sZMp8EaKfmkVJJTuwUkdJSVoMDDGz1VnWeQL4pZk9lDL/c8A/gBGpHVhKEsce5146NeP3VW59N4M3Pyp3FdlVeo1eX368vvy0Vt+wfj0Ksp9ly5Zx1llnMW3aNAD+8Ic/sOuuu/LVr36V2bNnM3PmTKZMmdKi3apVq+jevXuL+a3Zf//9M3Zg5UjiOAp4COhMCPb9F7ANuSdxGHBwnJ4OPAhsQoimaki0XwT0INwE/TowIEOdPYHFhKDe5wg3MvdNWWcgobPsnJg3AJhLuAn6J619H57Ekb9Kr9Hry4/Xl59S1ZeapLHVVlvZunXrzMxs3bp1tuWWW6Zt19b6KFMSRyb7ATeb2WcW8gYfAfbaiPafAk1XEecBj5jZmjg9KLHew2a20sw+JtxP1vKYNugC9AeeMLPdgSeBi1LWGQvcYWafNc0ws8VmNhzYEThOUiWm7DvnXFFtu+22PPJIeIDHrFmz+OIXv9hKi8Ip5gndF4Cj29i2TVFSkgoVJXVCyjpjgZ+ka2xm/46DT74C3JFhH845V/XGjRtHfX09y5cvp3///px33nlMnTqVn//856xdu5ZNN92UK6+8smT1FLMDmwVcKOmHZnYltIiSug7oRbiBeALNO6lG4MdxQEU/ihwlJakpSmoWiSipWPMQYGvCkVnTvP7AO2b2kaStCUeVlxSrRuecqwSZkjTmzEn34JHi8yipIFOUFISjr1sSR30AXwKmSDJAhAEm84pco3POuQSPkiJzlFS6fcR5DwHDs23TOefam7q6OmbOnElNTc36YfQNDQ2ceOKJfPzxx3Tp0oX//d//Ze+9i3bSrJkOlcThnHOu7TyJo4xJHJmipCT9WtLieI9Zcv1ukm6V9EpsOyjO3zvR/vl4qtQ559o1T+KozCSOEYR7xV5OnqKU9GNguJmdKGkscKSZjZG0OfCpma2VtA3het62Zpbxed6exJG/Sq/R68uP15cfT+IooGpK4kjUtiqlA3sgfoYn4xD9ZUCf5IAOSV8AngL6pXZgnsRRWJVeo9eXH68vP57E0QGTOFJqW5Xyfj7QP/H+VaB3nN6HcK/bKsKRmSdxFFml1+j15cfry48ncZRGpSVxtImZPW1muxBqP0vSpq21cc659saTOFoqZRJHNm8QMg+XxG33ICR3rGdm/4ynNYcCz7ZhH845VxU8iaPCkjhaMQM4jpDCcTQwy8wsXvdabGEQx0BgSKzZOefaLU/iqMAkDkmTgP8ENpe0BLjKwg3MVxMSOl4B3iWkckA4DXqmpDWEo8AfW8rzw5xzzhWXJ3GE5acDLUYpxutn30kz/wbghmzbdM45V1yexOGccx1EXV0dNTU1DB264XbbMWPGUFtbS21tLYMGDaK2trZ8BW6kyr0rrwgkPQ10S5n9ffMgXudcBzB+/HhOOukkjj322PXzbr311vXTp556Kj16FOZ+sVLoUFFSZraPmdWmvOZJ6irpSkkLJb0k6ahEXcdIelHSC5JuSsy/X9KKeJO1c85VvHRRUE3MjNtuu41x48aVuKq2K9oRWEzcmE6Ikhob5+1KuHl5YbH220ZnA2+Z2eA48rEXgKQvAmcBXzaz9yTVJNpMJiR9/HcuO/hozWcMOvOvBS67cE4dtpbxFVwfVH6NXl9+vL7sChUFlcljjz1G3759S3ofV748SipsfzEwxMxWp8yfBCw0s6sytBsFnJZpkIhHSRVWpdfo9eXH68uutSioXKOaUqOgmlxyySX069ePY45JN+4ufx4lVYQoKaAnsBi4mDBc/3agb1x2F6FzfJyQd/iNlLajCJ1uq9+HR0nlr9Jr9Pry4/XlJ9f6UqOgzMzWrFljNTU1tnjx4iJUFniUVFDoKKkuQH/gCTPbnXDT8kWJZV8kdFTjgKmSem5Erc45V/H+9re/MWTIEPr371/uUjZKMTuwF4A92ti2TVFSNL+ml2uU1DvAh8Bf4vvbgd3j9BJghpmtMbPXCNfuqucEsXPOJYwbN459992XBQsW0L9/f66++moAbrnllqoavNGkw0dJmZlJuodwlDUL+BrhiA3CKcRxwDRJvYHBhFOTzjlXdTJFQV177bWlLaRAPEoqOIMQGXUp8DZwfJz/ADBa0ouEo7gJZvYOgKTHCBmI3WP81Alm9kCR63TOORd5lFRY/jrhSDB1vgGnxFfqsq9k26ZzzlWauro6Zs6cSU1NzfonKo8ZM4YFCxYAsGLFCnr27ElDQ0MZq8xdh0ricM65jsyTODZCpSVxSHpaUkPKa1hM1Xg+pm1cIalzSrtTJVm8Dpacv5ektZLa+twz55wrGU/iyFElJnGY2T7p5ks6xszejzXfQUigvyUuGwCMJtyvlmzTGfgd4T60VnkSR/4qvUavLz9eX3aexNFSMU8h7k8Y8n5F0wwze17BZFKSOJINS53EYWbvx8kuQNdYV5NLCI9auTul2U+BO8lyD1tKEgfnDlubadWy67tZ+AdaySq9Rq8vP15fdvX19VmXr1q1qtV1ICRxrF69usW6l1xyCXvvvXdO22iLXOvbKJnucM73RZUkcST28QDwHnAT0DnOOxz4fZxuBHrH6X6EG7A7EQaLHN3a9+FJHPmr9Bq9vvx4ffnxJI7SqLQkDgDM7CBCR9oNOEDS5oSju3PTrH4pcIaFm6edc66qeRJHS9WSxLFe7OzuJhx57QB8AXheUiMhbuo5SZ8H9gRuifOPBv5X0hE5fjbnnCsLT+LIXVUkcUjqDmxpZksldQEOAR6z8JDLmsR6jYTrcssJHVvT/GsJgb53FatG55wrBE/iyJFZ1SRxbAHMkNSNcNQ3G7giexPnnHPl1uGTOOJ1uFavwZnZoAzzx7fW1jnnCiFdkgbAZZddxuTJk9lyyy055JBDmDRpUhmrLJ1yDOJwzjnXBuPHj+f+++9vNm/27NncfffdXHXVVbzwwgucdtppZaqu9DyJIyRxdJV0paSFkl6SdFRcv5ukWyW9EtsOSmxruKQnY3rHPEmbZtyxc84VQLokjT/+8Y+ceeaZdO3aFYCampp0TdslT+IAJJ0HvGVmg+PAkabfkBOA98xsR0ljCckbY+JgjxuB71u4OftzwJps+/YkjvxVeo1eX37ae33FStJYuHAhjz32GLfddhs1NTVcdNFF7LXXxtyZVL08iSOoIzwapWk4/vI4/3BgYpy+A7g8dsyjgblm9nxs8066jXoSR2FVeo1eX37ae32FSqFITdJYuXIl8+bNY9KkSSxZsoTDDjuMm266ifCnqnIUI4mjmB3YUGBOmvnfBmoJyRm9gWckPboR290CmGVmEyRNBy4ADgR2Bq4DZsT1aoHdCPeDLZB0mZktTt2YpJ5x8leSRgGvAifFwR39gMUAZrZW0krgc4QHW5qkB4A+wC1m1uKqabx94EqA7bbf0abMq9zw/1OHraWS64PKr9Hry097r6/xu6MKUkdjYyNbbLEFo0aF7e2000789Kc/RRI/+tGPuOiiixg6dCh9+vQpyP4Kpb6+fn3NhVKO35b1SRzAm5Kakjjm5tg+NYnjEzNbIyltEgdAfCDlQGJnlKIL4SblJ8zsFEmnABcB389SQ5f4OfYCPgQeljTHzB7O1GCzTTqzoMhhnPmor68v2D+wYqn0Gr2+/Hh9bXPEEUcwe/ZsDjjgABYuXMinn35K7969W2/YDngSB7xD6IT+Et/fDuwep98ABgDE61494vpLgEfNbLmZfQjcm2jjnHNFkS5Jo66ujkWLFnH88cczduxYrrvuuoo7fVgsHT6JI95wfQ8wKtb8NcIN1BBORx4HPEmIjJoV138AOD3mJX4KfJWQWu+cc0WTKUnjxhtvLMopukrnSRzBGcANki4F3gaOj/OvjvNfAd4FxgKY2XuSLgaeiZ/hXjOr3OFTzjnXDnX4JI64/HXCkWDq/I8JD7dM1+ZGwlB655xzZeBJHM45VyXq6uqoqalh6NChzeZfdtllHHvsseyyyy6cfnq2O4balw7VgWVK4ih3Xc45lwuPkmquqKcQ47OzLiUMN18BvEm42bhFEke8BjbTzIamLiuULEkc9xMeZtkFeAz4iZl9JulWYKe4Wk9ghZnVxuSNOwif69qmG66dc66YRo4cSWNjY7N5TVFSXbqEP+ceJVUAlRgllcUxZvZ+rPkOwnWvW8xsTNMKkqYAK+Pbj4FzCNfscupwPUoqf5Veo9eXn/Zen0dJFZ5HSYW63o+TXYCusa5kPSIMRjkgrr8a+D9JO2b7AjxKqrAqvUavLz/tvT6Pkip8lBRmVpQX8DPgkjTzjwIeAjoTjsb+RTh9NwiYH9cZD1yeaDMTGBWnDTg4Tk8HHgQ2IURTNSTaLyLceLwp8DowoJV6HwDeA24COqcsGwk8m6ZNszqzvQYPHmyVbPbs2eUuoVWVXqPXlx+vLzevvfaa7bLLLuvfH3TQQTZr1qz19W2//fb21ltvlam6zNr6/aX729v0KscgjvVRUhbyBpuipHKVGiX1iJmtidODEus9bGYrLQyFb4qSysjMDiJ0pN2IR1oJ44D0dxA651wZNUVJAR4lVUDVEiW1Xuzs7iak0APrI6S+DdyaqZ1zzpWCR0k11+GjpCR1B7Y0s6WxszqEMBKxydeBl8xsSbFqcM65XHiUVHMeJRUezzJDUjfCUd9s4IrE8rGkOX0oqRHYCugq6QhgtJm9mLqec8654ujwUVLxOlzGa3BmNj7D/EGZ2jjnXDp1dXXMnDmTmpoa5s+fD8DEiROZOnXq+ud3XXjhhXzzm98sZ5lVo0MlcTjnXDmlS9IAOPnkk2loaKChocE7r41Qlg5M0ucl3SLpVUlzJN0raXCGdQdJml+g/eYcJSVpc0l/lfSSpBck/TZl+TGSXozLbipEfc659m3kyJH06tWr3GW0GyV/InM5EzosQ5RUFheZ2WxJXQlPXT7YzO6T9EXgLODLFh6t0mp2iydx5K/Sa/T68lPp9V37jS2Ktu3LL7+c66+/nj333JMpU6aw9dZbF21f7Yk2jEgv0Q6lA4CJZjYyZb4IiRrNEjqSGYmlTuhIqe/3hButp0qaBCw0s6taaZNM4tjj3Eun5rKrsui7Gbz5UbmryK7Sa/T68lPp9X2hR2e6d+/e+oqtWLZsGWeddRbTpk0D4N1336VHjx5I4pprruGdd97hjDPO2Ojtrlq1qiD1FUtb69t///3nmNmeaRdmusO5WC+qLKEjtusZ220f399F6BwfB54CvtHaNjyJI3+VXqPXl5+OUl9qkkauy1rTXr8/KiyJI5OKTOiI94bdDPzBzBbF2V2ALwKjCCkdUyX13IhanXMOgKVLl66fnj59eotnfbnMSn4NjJDQcXQb27YpoSN2Qk02NqHjSuBlM7s0MW8J8HTsIF+TtJDQoT2T6wdxznU848aNo76+nuXLl9O/f3/OO+886uvraWhoQBKDBg3iT3/6U7nLrBrl6MCqIqEj1nUB4XTjD1IW3UU48pomqTcwmHCK0TnnMkqXpHHCCSeUoZL2oeQdmFl1JHRI6g+cDbwEPBezxS63MHDjAWC0pBcJR3ETzOydYtXinHOupXIcgVVLQscSIG0iZqzjlPhyzrmceBJHYeU0iEPSDjErEEmjJP3MBy0459zG8SSOwsp1FOKdwGfxCcRXAgMID35sk3IlcWSpJ11Cx16ZkjgkjZf0dmLd1GtkzjnXgidxFFauHdg6M1sLHAlcZmYTCPdobbREEke9me1gZnsQUi36tmV7hWBm+5hZbfJFGC15kZkNAXYDvizp4ESzWxPrZ72h2Tnnsrn88ssZPnw4dXV1vPfee+Uup2rkeg1sjaRxwHHAt+K8Tdq4z/0JQ97XP7LEzJ5XMJmUJI5kw1ImcZjZh4RHq2Bmn0p6Dujfxs/sUVIFUOk1en35qfT6ihUl9aMf/YhzzjkHSZxzzjmceuqpXHPNNUXZV3uTawd2PHAi8Gsze03SF4Ab2rjPocCcNPO/DdQSkjN6A89IenQjtrsFMMvMJkiaDlwAHAjsDFwHzIjr1RKOqD4BFki6zMwWZ9twvN73LeD3idlHSRpJyG88Od02UqKkOHfY2o34OKXVd7PwB6SSVXqNXl9+Kr2+VatWUV9fn/d2li1bxurVq9Nua9iwYdx0001t2k+h6iuWotSXKaIj9QVsBuyU6/pZtpMpSuoSoC7x/gbC0dIgcouS+oQN2Y7nA2fH6U7AikT7qYn29wH7tVJvl7je/yTmfQ7oFqf/m9BxepRUkVV6jV5ffjpKfalxUf/+97/XT1988cU2ZsyYNm23vX5/ZImSyukITNK3gIuArsAXJNUC55vZYbl3letVfRKHNb/n6yrCqUrnnMvKkzgKK9dTiBMJqRf1AGbWIGn7Nu6z6pM4JG1jZk0BZocRbqp2zrmsPImjsHIexGFmK2MaRZN1bdmhWbtI4viZpMMIR4TvEk5NOuecK6FcO7AXJP0n0Dk+zPFnwBNt3alVfxLHWYSh/84558ok1/vAfgrsQrh+dBOwknDE5JxzLkd1dXXU1NQ0e2TKxIkT6devH7W1tdTW1nLvvfeWscLq0uoRmKTOwF/NbH/CKbV2R9LTQLeU2d83s3nlqMc51z6NHz+ek046iWOPPbbZ/JNPPpnTTjutTFVVr1aPwMzsM2CdpB6F2mmlRUlZ+iSOVzNFSSVqO0qSSUr/uGvnnEvwKKnCyvUa2CpgnqSHgNVNM83sZxu7w0SU1HVmNjbO25UQJbVwY7dXZBeZ2WxJXYGHJR1sZvcBSNoS+DnwdC4b8iSO/FV6jV5ffiq9vmIlcUCIkrr++uvZc889mTJlCltvvXXR9tWeNN34m30l6bh0883suo3eoXQAMNHMRqbMF+F+qmZRUnEU4kwzG1rKKKk0df+ecEP11Pj+UuAhwqCT08zs2TRtkkkce5x76dTcvqQy6LsZvPlRuavIrtJr9PryU+n1faFHZ7p37976iq1YtmwZZ511FtOmTQPg3XffpUePHkjimmuu4Z133uGMM87Y6O2uWrWqIPUVS1vr23///eeYWfqzXJnucC7Wi8xJHEcROoTOhKOxfxECgweRWxKHAQfH6enAg4S8xl2BhkT7RYR7uzYFXgcG5FBzz9hu+/h+d+DOOF1P6FQ9iaPIKr1Gry8/HaW+1CSOXJe1pr1+fxQgieO12EGkdn5tvZk5nf2Amy1cc3tT0iPAXsDcHNt/CjQ9aGce8ImZrZE0j9AJNnnYzFYCxCcqDwQyZiHGFI+bgT+Y2aJ4E/XF+L1fzrkCWLp0KdtsEx7uMX369GYjFF12uV4DSx6+bQp8h5CW0RbVHiW1JeG+tPp4c/PngRmSDrM0pxGdc66JR0kVVk4dmDXP/oOQojEHOLcN+6zqKKl49NY7sU49Ga6BOedckkdJFVaupxB3T7ztRDgiy/XorRmzdhEl5Zxzrsxy7YSmJKbXEjqRdFFQObEqj5JKWW9Ua+s459qHuro6Zs6cSU1NDfPnN789dcqUKZx22mm8/fbb9O7dO8MWXCHlGiV1gpntH18HmtkPCYMmnHOuwxg/fjz3339/i/mLFy/mwQcfZLvttitDVR1Xrh3YHTnOa6bSEjcy7HdLSaslfRRfayW9LWmYpFMkvShprqSHJQ2MbfaX1JB4fSzpiFLX7pwrrUxJGieffDKTJk0i5YkdrsiynkKUNIQQ4ttD0rcTi7ai+eCKdG2rInHDzD4A1t9iHwennGxm8yT1Jtzj9aGkHxFujB5jZrOB2rh+L+AVwn1nGXkSR/4qvUavLz/Frq/xt4cUZbt33303/fr1Y9dddy3K9l1mrV0D2wk4lHAj77cS8z8A/quVtvsThrZf0TTDzJ5XMJmUxI1kw3IlbsSjwxrgsVjv7MTip4DvpWl2NHCfmX2YZnvJJA7OHba2tRLKpu9m4Q9IJav0Gr2+/BS7vvr6+rzar1q1ivr6epYtW8bq1aupr6/n448/5swzz2Ty5Mnr3z/++OP06FGw6NiNrq9SFaW+THc4W/Mkin1zWS+lTTUmbpxL6CjTLbsc+EWa+bOAQ1vbtidx5K/Sa/T68lMt9SXTMubOnWt9+vSxgQMH2sCBA61z5842YMAAW7p0adnqq1RlS+IA/iHpJ4TTietPHZpZXY7tkyoycSMaC3w/daak7xFuHfhqyvxtgGHAAznW7pxrR4YNG8Zbb721/v2gQYN49tlnfRRiieQ6iOMGQuLEQcAjQH/CacRsXgD2aGNdbUrcoPkp0Y1K3IjX57qY2ZyU+V8n3A92mJl9ktLsGMLpyTXZP45zrj0YN24c++67LwsWLKB///5cffXV5S6pQ8v1CGxHM/uOpMPN7DpJNxGvE2VRNYkb0ThC5uF6knYD/gR8w8zeytDmrBLU5pyrAOmSNJIaGxtLU4gDcu/Amo4wVkgaCiwjDHbIyKw6EjcSjiEMDEmaTKj19jg89l9mdhiEYf/AAMIRqXPOuRLLtQO7UtLWwDnADMIf9VZzEK0KEjcS67RI1jezr2dZv5FwdOic6yA8iaOy5HQNzMyuMrP3zOwRM9vezGosMTzeOec6Ak/iqCw5dWCS+kq6WtJ98f3OklqNUK60JA5JT6ckaDTExI16SQsS82ri+ttJmi3pHzGN45uJbQ2X9KSkFyTNk5T1xm7nXPXzJI7KkuspxGuBaYTReBCSNG4FMg7BqcQkDjPbJ938+Ev3XWv5SJRfALeZ2R8l7QzcCwxSeL7YjcD3Ldyc/Tk2XCd0znUgnsRRPrl2YL3N7DZJZwGY2VpJn7XSpuqSONIwQmwWhJui/x2nRwNzzez5+LlSn5fWgkdJ5a/Sa/T68lONUVIffvghF154IQ8+mDVJzhVJrh3Y6niUYQCSRgArW2kzFJiTZv63CTmCuxIeDPmMpEdzrANCbuEsM5sgaTpwAXAgsDNwHWGQCXEfuxHuB1sg6TIzy3Yj87TYKd9J6FQNmAg8KOmncb9NgzoGAybpAaAPcIuZTUrdoEdJFVal1+j15acao6QWLVrEwoUL2WmnnQB4++232WWXXfjjH/+Y9lRjMXmUVOaIpd0JQ9tXxp8LgeGttMkUJXUJUJd4fwPhaGkQuUVJfQIoTp8PnB2nOwErEu2nJtrfB+yXpdZ+8eeWhGiqY+P7U4BT4/S+hGH9nYDTCMP8exOO8p4Evpbt+/AoqfxVeo1eX36qpb5klFSqgQMH2ttvv13Cqjaolu9vY5ElSirrIA5J28VO7jlCjNJ/AP8N7GJmrUU/VU0Sh5m9EX9+ANzEhhunTwBui8uejHX0BpYAj5rZcgshvvcSOnnnXDvmSRyVpbVRiHclpm81sxfMbL7lFp00C+gWT6MBLZI4OkvqQ0ji+HtK20agVlInSQMoYhKHpC7xsSlI2oSQvt80GvJfwNfisi8ROrC3CdmHwyRtHgd0fJVwdOaca8duvvlmli5dypo1a1iyZAknnNB8MHZjY6PfA1ZCrV0DS44JbXGjbzZmVZPE0Q14IHZenYG/AVPjslOBqZJOjrWOj0d/70m6GHgmzr/XzCr36rhzzrVDrXVglmE6J1YFSRxmtpoMpzrN7EXgyxmW3UgYSu+cc64MWjuFuKuk9yV9AAyP0+9L+kDS+6Uo0DnnKkVdXR01NTUMHTq0xbIpU6YgieXLl5ehso4pawdmZp3NbCsz29LMusTppvdbZWtbiTIlcZS7LudcdfAoqcqS6/PA2qTSoqTMbB8zq015zZN0v6TnYyzUFZI6x5pqJT0VO7pnJe0d5x8eo6Wa5u9XzLqdc5XBo6QqS643Mm+0SoySyuIYM3s/1nwH8B3gFkLCx3lmdl/MQZwEjAIeBmbEgSrDCUPth2TbgSdx5K/Sa/T68lONSRzgUVLlVLQOjCqKkjKzput5XYCubBiwkjZKysxWJZpvQYYBLp7EUViVXqPXl59qTOL4+OOPOfPMM5k8efL6948//jg9evQoTNFtqK9SlS2Joy0vMidxHAU8RBiy3pdwr9U25J7EYcDBcXo6ITljE0I0VUOi/SJCp7Mp8DowoJV6HwDeI9zI3DnO+1KsbzHwBjAwsf6RwEvAu8C+rX0fnsSRv0qv0evLT7XUl0zimDt3rvXp08cGDhxoAwcOtM6dO9uAAQNs6dKlZauvUpU8iaNI9gNuNrPPzOxNwhON99qI9p8CTVdR5wGPWLixeh6hE2zysJmtNLOPCfeTDcy2UTM7iNCRdgMOiLN/BJxsZgOAk0mk75vZdDMbAhwB/Goj6nfOtRPDhg3jrbfeorGxkcbGRvr3789zzz3H5z//+XKX1iEUswOrmiipJrGzuxs4PM46DvhLnL6dNIkgZvYosH1Tmodzrv3yKKnKUswOrFqipLpL2iZOdwEOIZwahHDN66tx+gDg5bjejnHAB5J2Jxy1tfpIFedcdfMoqcpStEEcZlUTJbUFMENSN0KHPhtoGnjyX8DvY8f2MXFABuE63rGS1gAfAWMSR4XOOedKoJijEKslSupNMlyDM7P/I81pUDP7HfC7TNt0zlWmuro6Zs6cSU1NDfPnh9tOzznnHO6++246depETU0N1157Ldtuu22ZK3W5KMcgDuecK4t0SRoTJkxg7ty5NDQ0cOihh3L++eeXqTq3scrSgZUroWNjoqQkbZmy3nJJl8Zl4yW9nVj2g0LU55wrrnRJGltttSEVb/Xq1Z6mUUWKegoxnXImdJjZPhux7gdAbdN7SXPYMCIRwvPRTsp1e57Ekb9Kr9Hry09r9RUrSQPg7LPP5vrrr6dHjx7Mnj27aPtxhaVSjz2QdAAw0cxGpswXIVGjWUJHHNwx08yGljqhI1HbYEJ81HZxcEqzOrK0SyZx7HHupVOzrV5WfTeDNz8qdxXZVXqNXl9+WqtvWL/CpFssW7aMs846i2nTprVY9uc//5lPP/2U448/vsWyVatW0b179xbzK0V7rW///fefY2Z7pl2Y6Q7nYr2osoSO2O5cQkdJYjtLgbmE7MRWt+FJHPmr9Bq9vvyUqr5kkkaq119/PeMy//7y016SODKpyISOaCxwc+L9PcAgMxtO6HSv24g6nXMV5OWXX14/fffddzNkSNZcbldBSn4NjJDQcXQb27YpoSPex9VkoxI64vW5LmY2p2memSVvWr6KcKrSOVfhxo0bR319PcuXL6d///6cd9553HvvvSxYsIBOnToxcOBArrjiitY35CpCOTqwWcCFkn5oZldCi4SO64BehISOCTTvpBqBH0vqBPSjiAkdCeNofvSFpG3MbGl8exjhZmvnXIW7+eabW8xLTdNw1aPkHZhZ1SR0NDmGMDAk6WeSDiMcEb5LuCbmnHOuhMpxBFYVCR2JdbZPM+8s4KzW2jrnnCueShrE4ZxzWdXV1VFTU8PQoUPXz5swYQJDhgxh+PDhHHnkkaxYsaJ8BbqSKmoHVq7EjY0l6e8xaePj+HpN0rBsiRuSPkvMn1GOup3raNJFQR144IHMnz+fuXPnMnjwYH7zm9+UqTpXakU7hVjOxI02uI/wFOZfxAEivcxsuaQ9yJy48ZGZ1Za0Suc6uJEjR9LY2Nhs3ujRo9dPjxgxgjvuuKPEVblyKeY1sP0JQ9vXj0k1s+cVTCYlcSPZsAyJG3XAkFjjOmB5vh8+lUdJ5a/Sa/T6sitmFFSTa665hjFjxhR9P64yFLMDGwrMSTP/24SMwV2B3sAzkh7diO1uAcwyswmSpgMXAAcCOxNuKG46nVcL7Ea472uBpMvMbHHqxiT1jJO/kjQKeBU4Kd5MDXCUpJGEo8aTE9vYVNKzhJGIvzWzu9JsOxklxbnD1m7ExyytvpuFP3CVrNJr9Pqyq6+vz7p81apVra4DIQpq9erVLda98cYbWbFiBf369ctpOxsr1/rKpUPWlymiI98XmSOjLgHqEu9vIBwtDSK3yKhP2JDheD5wdpzuBKxItJ+aaH8fsF+GOnsTjgSPju9PAW6I058DusXp/yZ0nE3t+sWf2xNuA9gh2/fhUVL5q/Qavb785FpfuiioadOm2YgRI2z16tVFqCxoL99fuVRblNQLpHkYZI7alLhB8yPKXBM33gE+ZEPS/O3A7nGb75hZ03auIvF5zOyN+HMRUE842nPOldj999/PpEmTmDFjBptvvnm5y3ElVMwObBbQLZ5GA1okbnSW1IeQuPH3lLaNQK2kTpIGUMTEjdgZ3gOMirO+RrhRGknbJFZdn7ghaWtJ3eJ0b+DLTW2cc8Uzbtw49t13XxYsWED//v25+uqrOemkk/jggw848MADqa2t5cQTTyx3ma5EinYNzKyqEjfOAG6ID6x8G2h6lkKmxI0vAX+StI7wPwG/NTPvwJwrMo+CcklFTeKwKkncMLPXCUeCqfPTJm6Y2RNA2ic5O+ecKw1P4nDOVQ1P4nBJHaoDk/R0Ij2j6eVHUs5VCU/icEkdKkrKzPYxs9qU1zxJXSVdKWmhpJckHRVrGinpOUlrJTV7hpmk7SQ9KOmfkl5MuYbnnCuCkSNH0qtXr2bzRo8eTZcu4WrIiBEjWLJkSTlKc2XgUVLB2cBbZja4KUoqzv8XYeDGaWnaXA/82swektSdMKQ/I0/iyF+l1+j1ZedJHK7QPEoqSBslFQeWEEcbJuvbmfCU5ofieqvSbdSTOAqr0mv0+rLzJI7i6pD1ZbrDOd8XmZM4jgIeAjoTjsb+BWxD7kkcBhwcp6cDDwKbEKKpGhLtFwE9CDdBvw4MyFBnT2AxcDFhuP7tQN+Uda4lJnXE90fEmv4C/AOYTAgD9iSOIqr0Gr2+/HgSR37aa32UKYkjk/2Am83sMwt5g48Ae21E+0+Bpqu484BHzGxNnB6UWO9hM1tpZh8T7icbmGF7XYD+wBNmtjvwJHBRKzV0Ab5COLW4FyFOavxGfAbnXIF4EkfH5VFSWaKkslhCONpbZGZrgbtyaOOcy5MncbikYl4DmwVcKOmHZnYltIiSuo4wWGIk4abmZCfVCPw4DqjoR5GjpCQ1RUnNIhEllcUzQE9JfczsbeAA4Nli1eicCzyJwyV5lFSQNkpK0l6E62xbA9+SdJ6Z7WJmn0k6DXg4jracA0wtco3OOecSPEqKrFFSzxCuj6Vr8xAwPNt2nXOFVVdXx8yZM6mpqWH+/HDb6IQJE7jnnnvo2rUrO+ywA9OmTaNnz57lLdSVRIdK4nDOVTdP4nBJHSqJI1OUVJYkjm6SbpX0Smw7KGV720laFU8nOueKzJM4XFKHSuIws33SzZd0HumTOE4A3jOzHSWNBX4HJG/zv5jwtOdWeRJH/iq9Rq8vO0/icIXmSRxB2iQO4HBgYpy+A7hckuIAlSMIA01WZ9qoJ3EUVqXX6PVl50kcxdUh68t0h3O+L9pBEgcwH+ifWPdVoDdhFOWT8edE4LTWvg9P4shfpdfo9eXHkzjy017rw5M4Cp7EMZHQOafNQHTOlY4ncXRcnsSRPYnjDWAAgKQuhCO6d4B9gEmSGgn3tf0/SSfl9tGcc23lSRwuyZM4LGsSxwzgOMJR2dHArNh5fqWpvaSJwCozu7xYNTrnAk/icEmexBGkTeIAro7zXwHeBcYWuQ7nnHM58iQOsiZxfAx8p5W2E7Mtd845VxyexOGcqxp1dXXU1NQwdOjQ9fMmTJjAkCFDGD58OEceeSQrVqwoX4GupDyJIyRx1EtakJhXE9ffTtJsSf+QNFfSN+P876ZsY52k2mLW7pzzKCnXnCdxAKFUvmtmqY9E+QVwm5n9UdLOwL3AIDP7M/Dn2HYYcJeZNRStcOccEKKkGhsbm80bPXr0+ukRI0Zwxx13lLgqVy6exJGdAVvF6R7Av9OsMw64pbUNeZRU/iq9Rq8vO4+ScoVWzA5sKOE5Wam+DdQSkjN6A89IenQjtrsFYTj7BEnTgQuAA4GdgesIQ9+J+9iNcD/YAkmXmdniLNudJukz4E5Cp2qEG5YflPTTuN+vp2k3hhA51YJHSRVWpdfo9WXnUVLF1SHryxTRke+LzFFSlwB1ifc3EI6WBpFblNQngOL0+cDZcboTsCLRfmqi/X3Afllq7Rd/bkmIpjo2vj8FODVO70sY1t8p0W4fYF4u34dHSeWv0mv0+vLjUVL5aa/1UaYoqWpJ4sDM3og/PwBuYsON0ycAt8VlT8Y6eieajgVa3lnpnCsZj5LquIrZgc0CusXTaECLJI7OkvoQ7r/6e0rbRqBWUidJAyhiEoekLpJ6x+lNgEMJIb4Qgoa/Fpd9idCBvR3fdyLc49bq9S/nXGF4lJRL8iQO6AY8EDuvzsDfgKlx2anAVEknx1rHJ47+RgKLzWxREWtzziV4lJRL6vBJHGa2mgynOs3sReDLGZbVAyMybdc551xxeRKHc65qeBKHS+pQHVimJI5y1+Wcy40ncbikDhUlZWb7mFltymteliipUyS9GGOkHpa0/qGYkn4naX58+Z2TzpXAyJEj6dWrV7N5o0ePpkuXcDVkxIgRLFmypByluTLoUFFSrUgXJfUPQiLIh5J+REj8GCPpEMJDL2sJg0DqJd1nZu9n2rgnceSv0mv0+rLzJA5XaB4llYWZzU68fQr4XpzeGXjUzNYCayXNBb5BvGcs8Tk8iaOAKr1Gry87T+Iorg5ZX6Y7nPN9kTmJ4yjgIcKQ9b6Ee622IfckDgMOjtPTCckZmxCiqRoS7RcR8gs3BV4HBmSptR6YBzQA5xCTPlLWuRz4RZweTRjqvznhxuZFxMSOTC9P4shfpdfo9eXHkzjy017rI0sSR1GH0WewH3CzmX0GvCnpEWAvYG6O7T8Fmq7izgM+MbM1kuYROsEmD5vZSgBJLwIDgUxZiN81szckbUnIQvw+cH3TQknfA/YEvgpgZg9K2gt4gnBj85OEtA/nXIk1JXE88sgjnsTRwXiUFFmjpJD0deBs4DAz+yTR5tcWBoEcCIjKvK7nXLviSRwuqZhHYLOACyX90MyuhBZRUtcBvQiJFhNo3kk1Aj+OcU39KHKUFNDTzJYnoqT+FpftBvwJ+IaZvZVo0zm2eSd+puGEU5nOuSLyJA6X5FFS2aOkJsdab48PvfyXmR1GuOb2WJz3PvA9CwM6nHPOlYhHSWWPkkr3/C/M7GPCSETnXAnV1dUxc+ZMampqmD8/3DY6YcIE7rnnHrp27coOO+zAtGnT6NmzZ3kLdSXRoZI4nHPVzZM4XFKHSuLIFCUl6deSFsd7zJLrd5N0q6RXYttBcf7nJM2WtErS5cWs2Tm3gSdxuKQOlcRhZvukmy9pC8J9Xi+nLDoBeM/MdpQ0FvgdMIZwPe8cwinPoeTAkzjyV+k1en3ZeRKHKzRP4gh1PRX3k7rocGBinL4DuFyS4nWz/5O0Y7YvwJM4CqvSa/T6svMkjuLqkPVlusM53xdVlMSR2M+qlPfzgf6J968CvRPvm9WZ7eVJHPmr9Bq9vvx4Ekd+2mt9eBJHq0kczrkq5UkcHZcncWT3BjAA1t/w3AN4pw3bcc4VgCdxuKQOn8TRihnAcYSsw6OBWYnO0zlXYp7E4ZI8iQOQNAn4T2BzSUuAqyzcJH01cIOkV4B3gbGJNo3AVkBXSUcAo83sxWLW6ZxzboMOn8QRl58OtBilaCFx4zsZ2gzKtk3nnHPF5UkczjnnqlI5RiGWjaSnCeG9Sd83s3nlqMc551zbdagOzDIkcTjnnKs+fgrROedcVZKPCi8NSR8AC8pdRxa9geXlLqIVlV6j15cfry8/7bW+gWbWJ92CDnUKscwWmNme5S4iE0nPVnJ9UPk1en358fry0xHr81OIzjnnqpJ3YM4556qSd2Clc2W5C2hFpdcHlV+j15cfry8/Ha4+H8ThnHOuKvkRmHPOuarkHZhzzrmq5B1YgUn6hqQFkl6RdGaa5d0k3RqXP52Swl/s2gZImi3pRUkvSPp5mnVGSVopqSG+zi1VfXH/jZLmxX0/m2a5JP0hfn9zJe1ewtp2SnwvDZLel/Q/KeuU/PuTdI2ktyTNT8zrJekhSS/Hn1tnaHtcXOdlSceVsL7Jkl6K/w2nS+qZoW3W34ci1jdR0huJ/47fzNA267/3ItZ3a6K2RkkNGdqW4vtL+3elJL+DmR7V7K+NfwGdgVeB7YGuhEfG7Jyyzo+BK+L0WODWEta3DbB7nN4SWJimvlHAzDJ+h41A7yzLvwncBwgYATxdxv/Wywg3WZb1+yM8U293YH5i3iTgzDh9JvC7NO16AYviz63j9NYlqm800CVO/y5dfbn8PhSxvonAaTn8DmT9916s+lKWTwHOLeP3l/bvSil+B/0IrLD2Bl4xs0Vm9ilwC3B4yjqHA9fF6TuAr0lSKYozs6Vm9lyc/oDwrLV+pdh3AR0OXG/BU0BPSduUoY6vAa+a2etl2HczZvYo4Xl1Scnfs+uAI9I0PQh4yMzeNbP3gIeAb5SiPjN70MzWxrdPAf0Lvd9cZfj+cpHLv/e8Zasv/u04Bmj5pM8SyfJ3pei/g96BFVY/YHHi/RJadhDr14n/gFcCnytJdQnx1OVuwNNpFu8r6XlJ90napbSVYcCDkuZI+mGa5bl8x6Uwlsx/NMr5/TXpa2ZL4/QyoG+adSrlu6wjHFWn09rvQzGdFE9xXpPh9FclfH9fAd40s5czLC/p95fyd6Xov4PegXVAkroDdwL/Y2bvpyx+jnBabFfgMuCuEpe3n5ntDhwM/ETSyBLvv1WSugKHAbenWVzu768FC+dqKvJ+GUlnA2uBP2dYpVy/D38EdgBqgaWE03SVaBzZj75K9v1l+7tSrN9B78AK6w1gQOJ9/zgv7TqSugA9gHdKUl3Y5yaEX7I/m9lfUpeb2ftmtipO3wtsIql3qeozszfiz7eA6YTTNEm5fMfFdjDwnJm9mbqg3N9fwptNp1bjz7fSrFPW71LSeOBQ4LvxD1wLOfw+FIWZvWlmn5nZOmBqhv2W+/vrAnwbuDXTOqX6/jL8XSn676B3YIX1DPBFSV+I/5c+FpiRss4MoGmkzdHArEz/eAstni+/GvinmV2cYZ3PN12Tk7Q34XekJB2spC0kbdk0TbjQPz9ltRnAsQpGACsTpylKJeP/9Zbz+0uR/D07Drg7zToPAKMlbR1PkY2O84pO0jeA04HDzOzDDOvk8vtQrPqS11WPzLDfXP69F9PXgZfMbEm6haX6/rL8XSn+72AxR6d0xBdhlNxCwuiks+O88wn/UAE2JZx6egX4O7B9CWvbj3AYPxdoiK9vAicCJ8Z1TgJeIIyoegr4jxLWt33c7/OxhqbvL1mfgP8vfr/zgD1L/N93C0KH1CMxr6zfH6EzXQqsIVxDOIFwXfVh4GXgb0CvuO6ewFWJtnXxd/EV4PgS1vcK4dpH0+9h08jcbYF7s/0+lKi+G+Lv11zCH+JtUuuL71v8ey9FfXH+tU2/d4l1y/H9Zfq7UvTfQY+Scs45V5X8FKJzzrmq5B2Yc865quQdmHPOuarkHZhzzrmq5B2Yc865quQdmHMFIOkzNU+qH9SGbRwhaecilIekbSXdUYxtZ9lnbaYUd+cKoUu5C3CunfjIzGrz3MYRwEzgxVwbSOpiG0JxMzKzfxNunC+JmBJRS7jn595S7dd1LH4E5lyRSNpD0iMxSPWBRKzOf0l6Jgb+3ilpc0n/QchXnByP4HaQVC9pz9imt6TGOD1e0gxJs4CHY+LCNZL+LukfklokoksapPg8qdj+LoVnNDVKOknSKbHtU5J6xfXqJf0+1jM/Jos0Pefprhh0+5Sk4XH+REk3SHqccCPw+cCY2H6MpL0lPRn384SknRL1/EXS/QrPhJqUqPsbkp6L39XDcV6rn9d1EMW4M9tf/upoL+AzNqQQTAc2AZ4A+sTlY4Br4vTnEu0uAH4ap68Fjk4sqycmjQC9gcY4PZ6QyNCUbHAh8L043ZOQDLFFSn2DiM+Tiu1fITy7qQ/hiQhNSSKXEMJYm/Y/NU6PTLS/DPhlnD4AaIjTE4E5wGaJ/VyeqGErNjwD7OvAnYn1FhFyQTcFXifk4/UhpHV8Ia6X8+f1V8d4+SlE5wqj2SlESUOBocBDMRqxMyEOCGCopAsIf3y707b8wYfMrOkZUaOBwySdFt9vCmxHeC5TJrMtPLvpA0krgXvi/HnA8MR6N0N4JpWkrRSenLwfcFScP0vS5yRtFdefYWYfZdhnD+A6SV8kRA9tklj2sJmtBJD0IjCQ8IDDR83stbivfD6va4e8A3OuOAS8YGb7pll2LXCEmT2vkMg+KsM21rLhNP+mKctWp+zrKDNbsBH1fZKYXpd4v47mfxdSs+Zay55bnWXZrwgd55FxkEt9hno+I/vfprZ8XtcO+TUw54pjAdBH0r4QHjehDQ+33BJYqvAIiu8m2nwQlzVpBPaI09kGYDwA/DSmgiNpt/zLX29M3OZ+hOT/lcBjxLoljQKWW8vnykHLz9ODDY/KGJ/Dvp8CRkr6QtxXrzi/mJ/XVRHvwJwrAguPmD8a+J2k5wnXxv4jLj6H8MTax4GXEs1uASbEgQk7ABcBP5L0D8I1sEx+RTgdN1fSC/F9oXwc938FIaUdwrWuPSTNBX7LhkdmpJoN7Nw0iAOYBPwmbq/Vsz9m9jbwQ+Av8Ttseu5VMT+vqyKeRu+cS0tSPXCamT1b7lqcS8ePwJxzzlUlPwJzzjlXlfwIzDnnXFXyDsw551xV8g7MOedcVfIOzDnnXFXyDsw551xV+v8BfrazJxZrIjQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(12,8))\n",
    "lgb.plot_importance(LGBR, max_num_features=20)\n",
    "plt.title(\"Features\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2a04c9a1",
   "metadata": {},
   "source": [
    "### 分析：\n",
    "- 交叉验证的，，\n",
    "- 用四个模型，LASSO，RandomForest, Xgboost, lightgbm分布进行回归分析，寻找最优参数后，进行rmse值比较，lightgbm的值最小，作为下行任务的基模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2cc4041",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pytorch-cv",
   "language": "python",
   "name": "pytorch-cv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
